migrate --update gets keys

The git log is outputting the diff, but this only looks at the new
files. When we have a new file, we can get the old filename by just
replacing "new" with "old". And then use branchFileRef to refer to it
allows catting the old key.

While this does have to skip past the old files in the diff, it's still
faster than calling git diff separately.

Sponsored-by: Nicholas Golder-Manning on Patreon
This commit is contained in:
Joey Hess 2023-12-07 17:03:58 -04:00
parent f1ce15036f
commit 7c7c9912c1
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 38 additions and 22 deletions

View file

@ -165,7 +165,7 @@ startAll o outputter = do
void $ liftIO cleanup
stop
{- Displays changes made. Only works when all the RefChanges are for the
{- Displays changes made. Only works when all the LoggedFileChanges are for the
- same key. The method is to compare each value with the value
- after it in the list, which is the old version of the value.
-
@ -179,7 +179,7 @@ startAll o outputter = do
- This also generates subtly better output when the git-annex branch
- got diverged.
-}
showLogIncremental :: Outputter -> [RefChange Key] -> Annex ()
showLogIncremental :: Outputter -> [LoggedFileChange Key] -> Annex ()
showLogIncremental outputter ps = do
sets <- mapM (getset newref) ps
previous <- maybe (return genesis) (getset oldref) (lastMaybe ps)
@ -196,7 +196,7 @@ showLogIncremental outputter ps = do
{- Displays changes made. Streams, and can display changes affecting
- different keys, but does twice as much reading of logged values
- as showLogIncremental. -}
showLog :: (ActionItem -> Outputter) -> [RefChange Key] -> Annex ()
showLog :: (ActionItem -> Outputter) -> [LoggedFileChange Key] -> Annex ()
showLog outputter cs = forM_ cs $ \c -> do
let ai = mkActionItem (changed c)
new <- S.fromList <$> loggedLocationsRef (newref c)
@ -279,7 +279,7 @@ compareChanges format changes = concatMap diff changes
- once the location log file is gone avoids it checking all the way back
- to commit 0 to see if it used to exist, so generally speeds things up a
- *lot* for newish files. -}
getKeyLog :: Key -> [CommandParam] -> Annex ([RefChange Key], IO Bool)
getKeyLog :: Key -> [CommandParam] -> Annex ([LoggedFileChange Key], IO Bool)
getKeyLog key os = do
top <- fromRepo Git.repoPath
p <- liftIO $ relPathCwdToFile top
@ -287,7 +287,7 @@ getKeyLog key os = do
let logfile = p P.</> locationLogFile config key
getGitLogAnnex [fromRawFilePath logfile] (Param "--remove-empty" : os)
getGitLogAnnex :: [FilePath] -> [CommandParam] -> Annex ([RefChange Key], IO Bool)
getGitLogAnnex :: [FilePath] -> [CommandParam] -> Annex ([LoggedFileChange Key], IO Bool)
getGitLogAnnex fs os = do
config <- Annex.getGitConfig
let fileselector = \_sha f ->

View file

@ -16,7 +16,7 @@ import Data.Time
import Data.Time.Clock.POSIX
-- A change made to a file.
data RefChange t = RefChange
data LoggedFileChange t = LoggedFileChange
{ changetime :: POSIXTime
, changed :: t
, changedfile :: FilePath
@ -25,7 +25,9 @@ data RefChange t = RefChange
}
deriving (Show)
-- Get the git log. Note that the returned cleanup action should only be
-- Get the git log of changes to files.
--
-- Note that the returned cleanup action should only be
-- run after processing the returned list.
getGitLog
:: Ref
@ -34,7 +36,7 @@ getGitLog
-> [CommandParam]
-> (Sha -> FilePath -> Maybe t)
-> Repo
-> IO ([RefChange t], IO Bool)
-> IO ([LoggedFileChange t], IO Bool)
getGitLog ref stopref fs os selector repo = do
(ls, cleanup) <- pipeNullSplit ps repo
return (parseGitRawLog selector (map decodeBL ls), cleanup)
@ -73,7 +75,7 @@ commitinfoFormat = "%H %ct"
--
-- The commitinfo is not included before all changelines, so
-- keep track of the most recently seen commitinfo.
parseGitRawLog :: (Ref -> FilePath -> Maybe t) -> [String] -> [RefChange t]
parseGitRawLog :: (Ref -> FilePath -> Maybe t) -> [String] -> [LoggedFileChange t]
parseGitRawLog selector = parse (deleteSha, epoch)
where
epoch = toEnum 0 :: POSIXTime
@ -90,7 +92,7 @@ parseGitRawLog selector = parse (deleteSha, epoch)
mrc = do
(old, new) <- parseRawChangeLine cl
v <- selector commitsha c2
return $ RefChange
return $ LoggedFileChange
{ changetime = ts
, changed = v
, changedfile = c2

View file

@ -50,14 +50,17 @@ import qualified Annex.Branch
import Git.Types
import Git.Tree
import Git.FilePath
import Git.Ref
import Git.Sha
import Git.Log
import Logs.File
import Logs
import Git.Log
import Git.Sha
import Annex.CatFile
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import Control.Concurrent.STM
import System.FilePath.ByteString as P
-- | What to use to record a migration. This should be the same Sha that is
-- used to as the content of the annexed file in the HEAD branch.
@ -128,16 +131,27 @@ streamNewDistributedMigrations a = do
-- and then deleted, and normally git log stops when a file
-- gets deleted.
([Param "--reverse", Param "--follow"])
(\sha _file -> Just sha)
forM_ l $ \c ->
unless (changed c `elem` toskip) $ do
moldkey <- catKey XXX
mnewkey <- catKey YYY
case (moldkey, mnewkey) of
(Just oldkey, Just newkey) -> a oldkey newkey
_ -> return ()
(\commit _file -> Just commit)
forM_ l (go toskip)
liftIO $ void cleanup
recordPerformedMigrations branchsha toskip
where
go toskip c
| newref c `elem` nullShas = return ()
| changed c `elem` toskip = return ()
| not ("/new/" `B.isInfixOf` newfile) = return ()
| otherwise =
catKey (newref c) >>= \case
Nothing -> return ()
Just newkey -> catKey oldfileref >>= \case
Nothing -> return ()
Just oldkey -> a oldkey newkey
where
newfile = toRawFilePath (changedfile c)
oldfile = migrationTreeGraftPoint
P.</> "old"
P.</> P.takeBaseName (fromInternalGitPath newfile)
oldfileref = branchFileRef (changed c) oldfile
getPerformedMigrations :: Annex (Maybe Sha, [Sha])
getPerformedMigrations = do
@ -161,12 +175,12 @@ getPerformedMigrations = do
-- commit. The list is additional commits that can be removed from the
-- log file if present.
recordPerformedMigrations :: Sha -> [Sha] -> Annex ()
recordPerformedMigrations sha toremove = do
recordPerformedMigrations commit toremove = do
logf <- fromRepo gitAnnexMigrationsLog
lckf <- fromRepo gitAnnexMigrationsLock
modifyLogFile logf lckf (update . drop 1)
where
update l = L.fromStrict (fromRef' sha) : filter (`notElem` toremove') l
update l = L.fromStrict (fromRef' commit) : filter (`notElem` toremove') l
toremove' = map (L.fromStrict . fromRef') toremove
-- Record that a migration was performed locally and committed.