oldkeys: New command that lists the keys used by old versions of a file
The tricky thing about this turned out to be handling renames and reverts. For that, it has to make two passes over the git log, and to avoid buffering a possibly huge amount of logs in memory (ie the whole git log of an entire repository!), runs git log twice. (It might be possible to speed this up by asking git log to show a diff, and so avoid needing to use catKey.) Sponsored-By: Brock Spratlen on Patreon
This commit is contained in:
parent
6115bced71
commit
cf8b30c914
8 changed files with 178 additions and 2 deletions
|
@ -1,5 +1,6 @@
|
|||
git-annex (10.20230803) UNRELEASED; urgency=medium
|
||||
|
||||
* oldkeys: New command that lists the keys used by old versions of a file.
|
||||
* Fix behavior of onlyingroup.
|
||||
* info: Added --dead-repositories option.
|
||||
* Significant startup speed increase by avoiding repeatedly checking
|
||||
|
|
|
@ -115,6 +115,7 @@ import qualified Command.Direct
|
|||
import qualified Command.Indirect
|
||||
import qualified Command.Upgrade
|
||||
import qualified Command.Forget
|
||||
import qualified Command.OldKeys
|
||||
import qualified Command.P2P
|
||||
import qualified Command.Proxy
|
||||
import qualified Command.DiffDriver
|
||||
|
@ -236,6 +237,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
|
|||
, Command.Indirect.cmd
|
||||
, Command.Upgrade.cmd
|
||||
, Command.Forget.cmd
|
||||
, Command.OldKeys.cmd
|
||||
, Command.P2P.cmd
|
||||
, Command.Proxy.cmd
|
||||
, Command.DiffDriver.cmd
|
||||
|
|
115
Command/OldKeys.hs
Normal file
115
Command/OldKeys.hs
Normal file
|
@ -0,0 +1,115 @@
|
|||
{- GIT-annex command
|
||||
-
|
||||
- Copyright 2023 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Command.OldKeys where
|
||||
|
||||
import Command
|
||||
import Git.Types
|
||||
import Git.Sha
|
||||
import qualified Git.Command
|
||||
import qualified Git.DiffTree as DiffTree
|
||||
import qualified Annex
|
||||
import Annex.CatFile
|
||||
import Utility.Terminal
|
||||
import qualified Utility.Format
|
||||
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.ByteString.Char8 as S8
|
||||
|
||||
cmd :: Command
|
||||
cmd = noCommit $ withAnnexOptions [annexedMatchingOptions] $
|
||||
command "oldkeys" SectionQuery
|
||||
"list keys used for old versions of files"
|
||||
paramPaths (seek <$$> optParser)
|
||||
|
||||
data OldKeysOptions = OldKeysOptions
|
||||
{ fileOptions :: CmdParams
|
||||
}
|
||||
|
||||
optParser :: CmdParamsDesc -> Parser OldKeysOptions
|
||||
optParser desc = OldKeysOptions
|
||||
<$> cmdParams desc
|
||||
|
||||
seek :: OldKeysOptions -> CommandSeek
|
||||
seek o = do
|
||||
isterminal <- liftIO $ checkIsTerminal stdout
|
||||
-- Get the diff twice and make separate passes over it
|
||||
-- to avoid needing to cache it all in memory.
|
||||
currentkeys <- withdiff getcurrentkeys
|
||||
withdiff $ \l ->
|
||||
forM_ l $ \i ->
|
||||
when (DiffTree.srcsha i `notElem` nullShas) $ do
|
||||
catKey (DiffTree.srcsha i) >>= \case
|
||||
Just key | S.notMember key currentkeys ->
|
||||
commandAction $ start isterminal key
|
||||
_ -> return ()
|
||||
where
|
||||
withdiff a = do
|
||||
(output, cleanup) <- Annex.inRepo $
|
||||
Git.Command.pipeNullSplit ps
|
||||
let l = filter (isfilemode . DiffTree.srcmode)
|
||||
(DiffTree.parseDiffRaw output)
|
||||
r <- a l
|
||||
liftIO $ void cleanup
|
||||
return r
|
||||
|
||||
ps =
|
||||
[ Param "log"
|
||||
, Param "-z"
|
||||
-- Don't convert pointer files.
|
||||
, Param "--no-textconv"
|
||||
-- Don't abbreviate hashes.
|
||||
, Param "--no-abbrev"
|
||||
-- Don't show renames.
|
||||
, Param "--no-renames"
|
||||
-- Output the raw diff.
|
||||
, Param "--raw"
|
||||
-- Avoid outputting anything except for the raw diff.
|
||||
, Param "--pretty="
|
||||
] ++ map File (fileOptions o)
|
||||
|
||||
isfilemode m = case toTreeItemType m of
|
||||
Just TreeFile -> True
|
||||
Just TreeExecutable -> True
|
||||
Just TreeSymlink -> True
|
||||
_ -> False
|
||||
|
||||
-- Accumulate the most recent key used for each file
|
||||
-- (that is not deleted).
|
||||
-- Those keys should never be listed as old keys, even if
|
||||
-- some other file did have them as an old key. This avoids
|
||||
-- surprising behavior for renames and reverts.
|
||||
getcurrentkeys l = getcurrentkeys' l M.empty
|
||||
getcurrentkeys' [] m = pure $ S.fromList $ catMaybes $ M.elems m
|
||||
getcurrentkeys' (i:l) m
|
||||
| not (isfilemode (DiffTree.dstmode i)) =
|
||||
getcurrentkeys' l m
|
||||
| DiffTree.dstsha i `elem` nullShas =
|
||||
getcurrentkeys' l $
|
||||
M.insertWith (\_ prev -> prev)
|
||||
(DiffTree.file i)
|
||||
Nothing
|
||||
m
|
||||
| otherwise = case M.lookup (DiffTree.file i) m of
|
||||
Just _ -> getcurrentkeys' l m
|
||||
Nothing -> catKey (DiffTree.dstsha i) >>= \case
|
||||
Just key -> getcurrentkeys' l $
|
||||
M.insert
|
||||
(DiffTree.file i)
|
||||
(Just key)
|
||||
m
|
||||
_ -> getcurrentkeys' l m
|
||||
|
||||
start :: IsTerminal -> Key -> CommandStart
|
||||
start (IsTerminal isterminal) key = startingCustomOutput key $ do
|
||||
liftIO $ S8.putStrLn $ if isterminal
|
||||
then Utility.Format.encode_c (const False) sk
|
||||
else sk
|
||||
next $ return True
|
||||
where
|
||||
sk = serializeKey' key
|
38
doc/git-annex-oldkeys.mdwn
Normal file
38
doc/git-annex-oldkeys.mdwn
Normal file
|
@ -0,0 +1,38 @@
|
|||
# NAME
|
||||
|
||||
git-annex oldkeys - list keys used for old versions of files
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
git annex oldkeys `[path ...]`
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
Lists keys used for old versions of the specified files or directories.
|
||||
|
||||
The output from this command can be piped into a command like
|
||||
`git-annex drop --batch-keys`
|
||||
|
||||
The keys are listed in order from newest to oldest.
|
||||
|
||||
When listing old keys for a directory, it will include the most recent
|
||||
key used by deleted files (but not by renamed files).
|
||||
|
||||
Note that the listed keys may still be used by other files in the
|
||||
repository.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
* Also the [[git-annex-common-options]](1) can be used.
|
||||
|
||||
# SEE ALSO
|
||||
|
||||
[[git-annex]](1)
|
||||
|
||||
[[git-annex-unused]](1)
|
||||
|
||||
# AUTHOR
|
||||
|
||||
Joey Hess <id@joeyh.name>
|
||||
|
||||
Warning: Automatically converted into a man page by mdwn2man. Edit with care.
|
|
@ -101,6 +101,8 @@ reflog.
|
|||
|
||||
[[git-annex-whereused]](1)
|
||||
|
||||
[[git-annex-oldkeys]](1)
|
||||
|
||||
# AUTHOR
|
||||
|
||||
Joey Hess <id@joeyh.name>
|
||||
|
|
|
@ -488,6 +488,10 @@ content from the key-value store.
|
|||
|
||||
See [[git-annex-log]](1) for details.
|
||||
|
||||
* `oldkeys [path ...]`
|
||||
|
||||
List keys used for old versions of files.
|
||||
|
||||
* `info [directory|file|remote|uuid ...]`
|
||||
|
||||
Displays statistics and other information for the specified item,
|
||||
|
|
|
@ -13,7 +13,8 @@ Or like this:
|
|||
# proceed to diff between old versions of the file
|
||||
# (although git-annex-diffdriver --get is another way to do this)
|
||||
|
||||
Or this to make every old version visible as files:
|
||||
Or this to make every old version visible as files to flip through in a
|
||||
slideshow:
|
||||
|
||||
n=0
|
||||
for k in $(git-annex oldkeys my.gif); do
|
||||
|
@ -24,4 +25,15 @@ Or this to make every old version visible as files:
|
|||
----
|
||||
|
||||
Is oldkeys the best name for this? `git-annex log` is already taken.
|
||||
--[[Joey]]
|
||||
|
||||
Since this would be implemented on top of `git log --raw`, it would
|
||||
be possible to support multiple files at once, or whole directories.
|
||||
|
||||
If an old key is the same as the current key, should it list the old key or
|
||||
not? If it did, then the move example above would move the current
|
||||
version of the file away. And there are tricky cases involving renames
|
||||
and reverts. So it seems that it ought to avoid
|
||||
ever listing a key currently used by the file(s) it is run on
|
||||
as an old key.
|
||||
|
||||
> [[done]] --[[Joey]]
|
||||
|
|
|
@ -101,6 +101,7 @@ Extra-Source-Files:
|
|||
doc/git-annex-move.mdwn
|
||||
doc/git-annex-multicast.mdwn
|
||||
doc/git-annex-numcopies.mdwn
|
||||
doc/git-annex-oldkeys.mdwn
|
||||
doc/git-annex-p2p.mdwn
|
||||
doc/git-annex-pre-commit.mdwn
|
||||
doc/git-annex-preferred-content.mdwn
|
||||
|
@ -776,6 +777,7 @@ Executable git-annex
|
|||
Command.NotifyChanges
|
||||
Command.NumCopies
|
||||
Command.MinCopies
|
||||
Command.OldKeys
|
||||
Command.P2P
|
||||
Command.P2PStdIO
|
||||
Command.PostReceive
|
||||
|
|
Loading…
Reference in a new issue