2015-12-15 19:34:28 +00:00
|
|
|
{- git-annex worktree files
|
|
|
|
-
|
2021-05-31 17:40:42 +00:00
|
|
|
- Copyright 2013-2021 Joey Hess <id@joeyh.name>
|
2015-12-15 19:34:28 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2015-12-15 19:34:28 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Annex.WorkTree where
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2021-05-31 17:40:42 +00:00
|
|
|
import qualified Annex
|
2015-12-15 19:34:28 +00:00
|
|
|
import Annex.Link
|
|
|
|
import Annex.CatFile
|
2016-10-17 19:19:47 +00:00
|
|
|
import Annex.Content
|
|
|
|
import Annex.ReplaceFile
|
2018-10-19 21:51:25 +00:00
|
|
|
import Annex.CurrentBranch
|
2018-12-11 17:05:03 +00:00
|
|
|
import Annex.InodeSentinal
|
|
|
|
import Utility.InodeCache
|
2016-10-17 19:19:47 +00:00
|
|
|
import Git.FilePath
|
2021-05-31 17:40:42 +00:00
|
|
|
import Git.CatFile
|
2016-10-17 18:58:33 +00:00
|
|
|
import qualified Git.Ref
|
|
|
|
import qualified Git.LsTree
|
|
|
|
import qualified Git.Types
|
|
|
|
import qualified Database.Keys
|
2019-11-21 18:31:12 +00:00
|
|
|
import Config
|
2019-12-09 17:49:05 +00:00
|
|
|
import qualified Utility.RawFilePath as R
|
2015-12-15 19:34:28 +00:00
|
|
|
|
2021-05-31 17:40:42 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
2020-06-11 19:40:13 +00:00
|
|
|
|
2015-12-30 18:23:31 +00:00
|
|
|
{- Looks up the key corresponding to an annexed file in the work tree,
|
2015-12-15 19:34:28 +00:00
|
|
|
- by examining what the file links to.
|
|
|
|
-
|
|
|
|
- An unlocked file will not have a link on disk, so fall back to
|
|
|
|
- looking for a pointer to a key in git.
|
2018-10-19 21:51:25 +00:00
|
|
|
-
|
|
|
|
- When in an adjusted branch that may have hidden the file, looks for a
|
|
|
|
- pointer to a key in the original branch.
|
2015-12-15 19:34:28 +00:00
|
|
|
-}
|
2020-07-10 18:17:35 +00:00
|
|
|
lookupKey :: RawFilePath -> Annex (Maybe Key)
|
|
|
|
lookupKey = lookupKey' catkeyfile
|
2019-02-05 17:13:09 +00:00
|
|
|
where
|
|
|
|
catkeyfile file =
|
2019-11-26 19:27:22 +00:00
|
|
|
ifM (liftIO $ doesFileExist $ fromRawFilePath file)
|
2018-10-19 21:51:25 +00:00
|
|
|
( catKeyFile file
|
|
|
|
, catKeyFileHidden file =<< getCurrentBranch
|
2015-12-16 18:27:12 +00:00
|
|
|
)
|
2019-02-05 17:13:09 +00:00
|
|
|
|
2020-07-10 18:17:35 +00:00
|
|
|
lookupKeyNotHidden :: RawFilePath -> Annex (Maybe Key)
|
|
|
|
lookupKeyNotHidden = lookupKey' catkeyfile
|
2019-02-05 17:13:09 +00:00
|
|
|
where
|
|
|
|
catkeyfile file =
|
2019-11-26 19:27:22 +00:00
|
|
|
ifM (liftIO $ doesFileExist $ fromRawFilePath file)
|
2019-02-05 17:13:09 +00:00
|
|
|
( catKeyFile file
|
|
|
|
, return Nothing
|
|
|
|
)
|
|
|
|
|
2020-07-10 18:17:35 +00:00
|
|
|
lookupKey' :: (RawFilePath -> Annex (Maybe Key)) -> RawFilePath -> Annex (Maybe Key)
|
|
|
|
lookupKey' catkeyfile file = isAnnexLink file >>= \case
|
2019-02-05 17:13:09 +00:00
|
|
|
Just key -> return (Just key)
|
2019-08-30 17:54:57 +00:00
|
|
|
Nothing -> catkeyfile file
|
2015-12-15 19:34:28 +00:00
|
|
|
|
|
|
|
{- Modifies an action to only act on files that are already annexed,
|
|
|
|
- and passes the key on to it. -}
|
2019-11-26 19:27:22 +00:00
|
|
|
whenAnnexed :: (RawFilePath -> Key -> Annex (Maybe a)) -> RawFilePath -> Annex (Maybe a)
|
2015-12-15 19:34:28 +00:00
|
|
|
whenAnnexed a file = ifAnnexed file (a file) (return Nothing)
|
|
|
|
|
2019-11-26 19:27:22 +00:00
|
|
|
ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a
|
2020-07-10 18:17:35 +00:00
|
|
|
ifAnnexed file yes no = maybe no yes =<< lookupKey file
|
2016-10-17 18:58:33 +00:00
|
|
|
|
include locked files in the keys database associated files
Before only unlocked files were included.
The initial scan now scans for locked as well as unlocked files. This
does mean it gets a little bit slower, although I optimised it as well
as I think it can be.
reconcileStaged changed to diff from the current index to the tree of
the previous index. This lets it handle deletions as well, removing
associated files for both locked and unlocked files, which did not
always happen before.
On upgrade, there will be no recorded previous tree, so it will diff
from the empty tree to current index, and so will fully populate the
associated files, as well as removing any stale associated files
that were present due to them not being removed before.
reconcileStaged now does a bit more work. Most of the time, this will
just be due to running more often, after some change is made to the
index, and since there will be few changes since the last time, it will
not be a noticable overhead. What may turn out to be a noticable
slowdown is after changing to a branch, it has to go through the diff
from the previous index to the new one, and if there are lots of
changes, that could take a long time. Also, after adding a lot of files,
or deleting a lot of files, or moving a large subdirectory, etc.
Command.Lock used removeAssociatedFile, but now that's wrong because a
newly locked file still needs to have its associated file tracked.
Command.Rekey used removeAssociatedFile when the file was unlocked.
It could remove it also when it's locked, but it is not really
necessary, because it changes the index, and so the next time git-annex
run and accesses the keys db, reconcileStaged will run and update it.
There are probably several other places that use addAssociatedFile and
don't need to any more for similar reasons. But there's no harm in
keeping them, and it probably is a good idea to, if only to support
mixing this with older versions of git-annex.
However, mixing this and older versions does risk reconcileStaged not
running, if the older version already ran it on a given index state. So
it's not a good idea to mix versions. This problem could be dealt with
by changing the name of the gitAnnexKeysDbIndexCache, but that would
leave the old file dangling, or it would need to keep trying to remove
it.
2021-05-21 19:47:37 +00:00
|
|
|
{- Find all annexed files and update the keys database for them.
|
2016-10-17 18:58:33 +00:00
|
|
|
-
|
2021-06-08 13:11:24 +00:00
|
|
|
- This is expensive, and so normally the associated files are updated
|
|
|
|
- incrementally when changes are noticed. So, this only needs to be done
|
|
|
|
- when initializing/upgrading a repository.
|
|
|
|
-
|
include locked files in the keys database associated files
Before only unlocked files were included.
The initial scan now scans for locked as well as unlocked files. This
does mean it gets a little bit slower, although I optimised it as well
as I think it can be.
reconcileStaged changed to diff from the current index to the tree of
the previous index. This lets it handle deletions as well, removing
associated files for both locked and unlocked files, which did not
always happen before.
On upgrade, there will be no recorded previous tree, so it will diff
from the empty tree to current index, and so will fully populate the
associated files, as well as removing any stale associated files
that were present due to them not being removed before.
reconcileStaged now does a bit more work. Most of the time, this will
just be due to running more often, after some change is made to the
index, and since there will be few changes since the last time, it will
not be a noticable overhead. What may turn out to be a noticable
slowdown is after changing to a branch, it has to go through the diff
from the previous index to the new one, and if there are lots of
changes, that could take a long time. Also, after adding a lot of files,
or deleting a lot of files, or moving a large subdirectory, etc.
Command.Lock used removeAssociatedFile, but now that's wrong because a
newly locked file still needs to have its associated file tracked.
Command.Rekey used removeAssociatedFile when the file was unlocked.
It could remove it also when it's locked, but it is not really
necessary, because it changes the index, and so the next time git-annex
run and accesses the keys db, reconcileStaged will run and update it.
There are probably several other places that use addAssociatedFile and
don't need to any more for similar reasons. But there's no harm in
keeping them, and it probably is a good idea to, if only to support
mixing this with older versions of git-annex.
However, mixing this and older versions does risk reconcileStaged not
running, if the older version already ran it on a given index state. So
it's not a good idea to mix versions. This problem could be dealt with
by changing the name of the gitAnnexKeysDbIndexCache, but that would
leave the old file dangling, or it would need to keep trying to remove
it.
2021-05-21 19:47:37 +00:00
|
|
|
- Also, the content for an unlocked file may already be present as
|
2019-11-05 16:41:15 +00:00
|
|
|
- an annex object. If so, populate the pointer file with it.
|
|
|
|
- But if worktree file does not have a pointer file's content, it is left
|
|
|
|
- as-is.
|
2016-10-17 18:58:33 +00:00
|
|
|
-}
|
include locked files in the keys database associated files
Before only unlocked files were included.
The initial scan now scans for locked as well as unlocked files. This
does mean it gets a little bit slower, although I optimised it as well
as I think it can be.
reconcileStaged changed to diff from the current index to the tree of
the previous index. This lets it handle deletions as well, removing
associated files for both locked and unlocked files, which did not
always happen before.
On upgrade, there will be no recorded previous tree, so it will diff
from the empty tree to current index, and so will fully populate the
associated files, as well as removing any stale associated files
that were present due to them not being removed before.
reconcileStaged now does a bit more work. Most of the time, this will
just be due to running more often, after some change is made to the
index, and since there will be few changes since the last time, it will
not be a noticable overhead. What may turn out to be a noticable
slowdown is after changing to a branch, it has to go through the diff
from the previous index to the new one, and if there are lots of
changes, that could take a long time. Also, after adding a lot of files,
or deleting a lot of files, or moving a large subdirectory, etc.
Command.Lock used removeAssociatedFile, but now that's wrong because a
newly locked file still needs to have its associated file tracked.
Command.Rekey used removeAssociatedFile when the file was unlocked.
It could remove it also when it's locked, but it is not really
necessary, because it changes the index, and so the next time git-annex
run and accesses the keys db, reconcileStaged will run and update it.
There are probably several other places that use addAssociatedFile and
don't need to any more for similar reasons. But there's no harm in
keeping them, and it probably is a good idea to, if only to support
mixing this with older versions of git-annex.
However, mixing this and older versions does risk reconcileStaged not
running, if the older version already ran it on a given index state. So
it's not a good idea to mix versions. This problem could be dealt with
by changing the name of the gitAnnexKeysDbIndexCache, but that would
leave the old file dangling, or it would need to keep trying to remove
it.
2021-05-21 19:47:37 +00:00
|
|
|
scanAnnexedFiles :: Annex ()
|
2021-06-08 13:11:24 +00:00
|
|
|
scanAnnexedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $ do
|
2021-06-08 13:27:53 +00:00
|
|
|
-- This gets the keys database populated with all annexed files,
|
|
|
|
-- by running Database.Keys.reconcileStaged.
|
|
|
|
Database.Keys.runWriter (const noop)
|
|
|
|
-- The above tries to populate pointer files, but one thing it
|
|
|
|
-- is not able to handle is populating a pointer file when the
|
|
|
|
-- annex object file already exists, but its inode is not yet
|
|
|
|
-- cached. So, the rest of this makes another pass over the
|
|
|
|
-- tree to do that.
|
2021-06-08 13:11:24 +00:00
|
|
|
g <- Annex.gitRepo
|
|
|
|
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
|
|
|
|
Git.LsTree.LsTreeRecursive
|
|
|
|
(Git.LsTree.LsTreeLong True)
|
|
|
|
Git.Ref.headRef
|
|
|
|
catObjectStreamLsTree l want g go
|
|
|
|
liftIO $ void cleanup
|
2016-10-17 18:58:33 +00:00
|
|
|
where
|
2021-05-31 17:40:42 +00:00
|
|
|
-- Want to process symlinks, and regular files.
|
|
|
|
want i = case Git.Types.toTreeItemType (Git.LsTree.mode i) of
|
|
|
|
Just Git.Types.TreeSymlink -> Just (i, False)
|
|
|
|
Just Git.Types.TreeFile -> checkfilesize i
|
|
|
|
Just Git.Types.TreeExecutable -> checkfilesize i
|
|
|
|
_ -> Nothing
|
|
|
|
|
|
|
|
-- Avoid processing files that are too large to be pointer files.
|
|
|
|
checkfilesize i = case Git.LsTree.size i of
|
|
|
|
Just n | n < maxPointerSz -> Just (i, True)
|
|
|
|
_ -> Nothing
|
|
|
|
|
2021-06-08 13:11:24 +00:00
|
|
|
go getnext = liftIO getnext >>= \case
|
2021-05-31 17:40:42 +00:00
|
|
|
Just ((i, isregfile), Just c) -> do
|
2021-06-08 13:11:24 +00:00
|
|
|
maybe noop (add i isregfile)
|
2021-05-31 17:40:42 +00:00
|
|
|
(parseLinkTargetOrPointer (L.toStrict c))
|
2021-06-08 13:11:24 +00:00
|
|
|
go getnext
|
2021-05-31 17:40:42 +00:00
|
|
|
_ -> return ()
|
|
|
|
|
2021-06-08 13:11:24 +00:00
|
|
|
add i isregfile k = do
|
2016-10-17 19:19:47 +00:00
|
|
|
let tf = Git.LsTree.file i
|
2021-05-31 17:40:42 +00:00
|
|
|
whenM (pure isregfile <&&> inAnnex k) $ do
|
2016-10-17 19:19:47 +00:00
|
|
|
f <- fromRepo $ fromTopFilePath tf
|
2019-12-09 17:49:05 +00:00
|
|
|
liftIO (isPointerFile f) >>= \case
|
2019-11-05 16:41:15 +00:00
|
|
|
Just k' | k' == k -> do
|
2019-12-09 17:49:05 +00:00
|
|
|
destmode <- liftIO $ catchMaybeIO $
|
|
|
|
fileMode <$> R.getFileStatus f
|
2020-03-06 15:31:01 +00:00
|
|
|
ic <- replaceWorkTreeFile (fromRawFilePath f) $ \tmp -> do
|
2019-12-11 18:12:22 +00:00
|
|
|
let tmp' = toRawFilePath tmp
|
2020-10-30 17:07:41 +00:00
|
|
|
linkFromAnnex k tmp' destmode >>= \case
|
2019-11-05 16:41:15 +00:00
|
|
|
LinkAnnexOk ->
|
2019-12-11 18:12:22 +00:00
|
|
|
withTSDelta (liftIO . genInodeCache tmp')
|
2019-11-05 16:41:15 +00:00
|
|
|
LinkAnnexNoop -> return Nothing
|
|
|
|
LinkAnnexFailed -> liftIO $ do
|
2019-12-11 18:12:22 +00:00
|
|
|
writePointerFile tmp' k destmode
|
2019-11-05 16:41:15 +00:00
|
|
|
return Nothing
|
2019-12-09 17:49:05 +00:00
|
|
|
maybe noop (restagePointerFile (Restage True) f) ic
|
2019-11-05 16:41:15 +00:00
|
|
|
_ -> noop
|