v6: Update associated files database when git has staged changes to pointer files

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2018-08-21 16:48:20 -04:00
parent 056b081280
commit 5e56d9b620
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 86 additions and 10 deletions

View file

@ -11,6 +11,8 @@ git-annex (6.20180808) UNRELEASED; urgency=medium
pipe. This also avoids git buffering the whole file content in memory. pipe. This also avoids git buffering the whole file content in memory.
* v6: After updating the worktree for an add/drop, update git's index, * v6: After updating the worktree for an add/drop, update git's index,
so git status will not show the files as modified. so git status will not show the files as modified.
* v6: Update associated files database when git has staged changes
to pointer files.
* linux standalone: When LOCPATH is already set, use it instead of the * linux standalone: When LOCPATH is already set, use it instead of the
bundled locales. It can be set to an empty string to use the system bundled locales. It can be set to an empty string to use the system
locales too. locales too.

View file

@ -1,6 +1,6 @@
{- Sqlite database of information about Keys {- Sqlite database of information about Keys
- -
- Copyright 2015-2016 Joey Hess <id@joeyh.name> - Copyright 2015-2018 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -31,9 +31,13 @@ import Annex.Common hiding (delete)
import Annex.Version (versionUsesKeysDatabase) import Annex.Version (versionUsesKeysDatabase)
import qualified Annex import qualified Annex
import Annex.LockFile import Annex.LockFile
import Annex.CatFile
import Utility.InodeCache import Utility.InodeCache
import Annex.InodeSentinal import Annex.InodeSentinal
import Git
import Git.FilePath import Git.FilePath
import Git.Command
import Git.Types
{- Runs an action that reads from the database. {- Runs an action that reads from the database.
- -
@ -124,12 +128,16 @@ openDb createdb _ = catchPermissionDenied permerr $ withExclusiveLock gitAnnexKe
open db open db
(False, False) -> return DbUnavailable (False, False) -> return DbUnavailable
where where
open db = liftIO $ DbOpen <$> H.openDbQueue H.MultiWriter db SQL.containedTable
-- If permissions don't allow opening the database, treat it as if -- If permissions don't allow opening the database, treat it as if
-- it does not exist. -- it does not exist.
permerr e = case createdb of permerr e = case createdb of
False -> return DbUnavailable False -> return DbUnavailable
True -> throwM e True -> throwM e
open db = do
h <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable
reconcileStaged (SQL.WriteHandle h)
return $ DbOpen h
{- Closes the database if it was open. Any writes will be flushed to it. {- Closes the database if it was open. Any writes will be flushed to it.
- -
@ -172,3 +180,66 @@ getInodeCaches = runReaderIO . SQL.getInodeCaches . toIKey
removeInodeCaches :: Key -> Annex () removeInodeCaches :: Key -> Annex ()
removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toIKey removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toIKey
{- Looks at staged changes to find when unlocked files are copied/moved,
- and updates associated files in the keys database.
-
- Since staged changes can be dropped later, does not remove any
- associated files; only adds new associated files.
-
- This needs to be run before querying the keys database so that
- information is consistent with the state of the repository.
-
- TODO To avoid unncessary work, the index file is statted, and if it's not
- changed since last time this was run, nothing is done.
-}
reconcileStaged :: SQL.WriteHandle -> Annex ()
reconcileStaged h@(SQL.WriteHandle qh) = whenM versionUsesKeysDatabase $ do
(l, cleanup) <- inRepo $ pipeNullSplit diff
changed <- go l False
void $ liftIO cleanup
-- Flush database changes immediately so other processes can see them.
when changed $
liftIO $ H.flushDbQueue qh
where
diff =
-- Avoid using external diff command, which would be slow.
-- (The -G option may make it be used otherwise.)
[ Param "-c", Param "diff.external="
, Param "diff"
, Param "--cached"
, Param "--raw"
, Param "-z"
, Param "--abbrev=40"
-- Optimization: Only find pointer files. This is not
-- perfect. A file could start with this and not be a
-- pointer file. And a pointer file that is replaced with
-- a non-pointer file will match this.
, Param $ "-G^" ++ toInternalGitPath (pathSeparator:objectDir)
-- Don't include files that were deleted, because this only
-- wants to update information for files that are present
-- in the index.
, Param "--diff-filter=AMUT"
-- Disable rename detection.
, Param "--no-renames"
-- Avoid other complications.
, Param "--ignore-submodules=all"
, Param "--no-ext-diff"
]
go (info:file:rest) changed = case words info of
((':':_srcmode):dstmode:_srcsha:dstsha:_change:[])
-- Only want files, not symlinks
| dstmode /= fmtTreeItemType TreeSymlink -> do
catKey (Ref dstsha) >>= \case
Nothing -> noop
Just k -> liftIO $
SQL.addAssociatedFileFast
(toIKey k)
(asTopFilePath file)
h
go rest True
| otherwise -> go rest changed
_ -> return changed -- parse failed
go _ changed = return changed

View file

@ -2,15 +2,18 @@ git-annex should use smudge/clean filters. v6 mode
### August sprint todo list ### August sprint todo list
* Reconcile staged changes into the associated files database, whenever * Avoid running reconcileStaged when the index has not changed since last
the database is queried. This is needed to handle eg: time.
git add largefile * If `git mv` of an unlocked file is run at the same time as `git annex drop`,
git mv largefile othername and when git-annex starts up, the mv has not happened yet, but once it
git annex move othername --to foo wants to update the associated file to drop the content, the mv has
# fails to drop content from associated file othername, happened, then the content will be left in the working tree despite
# because it doesn't know it has that name git-annex having said it dropped it.
# git commit clears up this mess
I think git-annex fsck also won't notice this problem, even though,
once the mv is complete, git-annex does notice the change in the index
and updates the associated files.
* Checking out a different branch causes git to smudge all changed files, * Checking out a different branch causes git to smudge all changed files,
and write their content. This does not honor annex.thin. A warning and write their content. This does not honor annex.thin. A warning