From 7c7c9912c1207786817e699c7d1c2a1529ca423d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Dec 2023 17:03:58 -0400 Subject: [PATCH] migrate --update gets keys The git log is outputting the diff, but this only looks at the new files. When we have a new file, we can get the old filename by just replacing "new" with "old". And then use branchFileRef to refer to it allows catting the old key. While this does have to skip past the old files in the diff, it's still faster than calling git diff separately. Sponsored-by: Nicholas Golder-Manning on Patreon --- Command/Log.hs | 10 +++++----- Git/Log.hs | 12 +++++++----- Logs/Migrate.hs | 38 ++++++++++++++++++++++++++------------ 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/Command/Log.hs b/Command/Log.hs index 54f56e1002..149b099dd5 100644 --- a/Command/Log.hs +++ b/Command/Log.hs @@ -165,7 +165,7 @@ startAll o outputter = do void $ liftIO cleanup stop -{- Displays changes made. Only works when all the RefChanges are for the +{- Displays changes made. Only works when all the LoggedFileChanges are for the - same key. The method is to compare each value with the value - after it in the list, which is the old version of the value. - @@ -179,7 +179,7 @@ startAll o outputter = do - This also generates subtly better output when the git-annex branch - got diverged. -} -showLogIncremental :: Outputter -> [RefChange Key] -> Annex () +showLogIncremental :: Outputter -> [LoggedFileChange Key] -> Annex () showLogIncremental outputter ps = do sets <- mapM (getset newref) ps previous <- maybe (return genesis) (getset oldref) (lastMaybe ps) @@ -196,7 +196,7 @@ showLogIncremental outputter ps = do {- Displays changes made. Streams, and can display changes affecting - different keys, but does twice as much reading of logged values - as showLogIncremental. -} -showLog :: (ActionItem -> Outputter) -> [RefChange Key] -> Annex () +showLog :: (ActionItem -> Outputter) -> [LoggedFileChange Key] -> Annex () showLog outputter cs = forM_ cs $ \c -> do let ai = mkActionItem (changed c) new <- S.fromList <$> loggedLocationsRef (newref c) @@ -279,7 +279,7 @@ compareChanges format changes = concatMap diff changes - once the location log file is gone avoids it checking all the way back - to commit 0 to see if it used to exist, so generally speeds things up a - *lot* for newish files. -} -getKeyLog :: Key -> [CommandParam] -> Annex ([RefChange Key], IO Bool) +getKeyLog :: Key -> [CommandParam] -> Annex ([LoggedFileChange Key], IO Bool) getKeyLog key os = do top <- fromRepo Git.repoPath p <- liftIO $ relPathCwdToFile top @@ -287,7 +287,7 @@ getKeyLog key os = do let logfile = p P. locationLogFile config key getGitLogAnnex [fromRawFilePath logfile] (Param "--remove-empty" : os) -getGitLogAnnex :: [FilePath] -> [CommandParam] -> Annex ([RefChange Key], IO Bool) +getGitLogAnnex :: [FilePath] -> [CommandParam] -> Annex ([LoggedFileChange Key], IO Bool) getGitLogAnnex fs os = do config <- Annex.getGitConfig let fileselector = \_sha f -> diff --git a/Git/Log.hs b/Git/Log.hs index 1b3c03b812..a3246d5102 100644 --- a/Git/Log.hs +++ b/Git/Log.hs @@ -16,7 +16,7 @@ import Data.Time import Data.Time.Clock.POSIX -- A change made to a file. -data RefChange t = RefChange +data LoggedFileChange t = LoggedFileChange { changetime :: POSIXTime , changed :: t , changedfile :: FilePath @@ -25,7 +25,9 @@ data RefChange t = RefChange } deriving (Show) --- Get the git log. Note that the returned cleanup action should only be +-- Get the git log of changes to files. +-- +-- Note that the returned cleanup action should only be -- run after processing the returned list. getGitLog :: Ref @@ -34,7 +36,7 @@ getGitLog -> [CommandParam] -> (Sha -> FilePath -> Maybe t) -> Repo - -> IO ([RefChange t], IO Bool) + -> IO ([LoggedFileChange t], IO Bool) getGitLog ref stopref fs os selector repo = do (ls, cleanup) <- pipeNullSplit ps repo return (parseGitRawLog selector (map decodeBL ls), cleanup) @@ -73,7 +75,7 @@ commitinfoFormat = "%H %ct" -- -- The commitinfo is not included before all changelines, so -- keep track of the most recently seen commitinfo. -parseGitRawLog :: (Ref -> FilePath -> Maybe t) -> [String] -> [RefChange t] +parseGitRawLog :: (Ref -> FilePath -> Maybe t) -> [String] -> [LoggedFileChange t] parseGitRawLog selector = parse (deleteSha, epoch) where epoch = toEnum 0 :: POSIXTime @@ -90,7 +92,7 @@ parseGitRawLog selector = parse (deleteSha, epoch) mrc = do (old, new) <- parseRawChangeLine cl v <- selector commitsha c2 - return $ RefChange + return $ LoggedFileChange { changetime = ts , changed = v , changedfile = c2 diff --git a/Logs/Migrate.hs b/Logs/Migrate.hs index f6444f1ee7..ad6409bb33 100644 --- a/Logs/Migrate.hs +++ b/Logs/Migrate.hs @@ -50,14 +50,17 @@ import qualified Annex.Branch import Git.Types import Git.Tree import Git.FilePath +import Git.Ref +import Git.Sha +import Git.Log import Logs.File import Logs -import Git.Log -import Git.Sha import Annex.CatFile +import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L import Control.Concurrent.STM +import System.FilePath.ByteString as P -- | What to use to record a migration. This should be the same Sha that is -- used to as the content of the annexed file in the HEAD branch. @@ -128,16 +131,27 @@ streamNewDistributedMigrations a = do -- and then deleted, and normally git log stops when a file -- gets deleted. ([Param "--reverse", Param "--follow"]) - (\sha _file -> Just sha) - forM_ l $ \c -> - unless (changed c `elem` toskip) $ do - moldkey <- catKey XXX - mnewkey <- catKey YYY - case (moldkey, mnewkey) of - (Just oldkey, Just newkey) -> a oldkey newkey - _ -> return () + (\commit _file -> Just commit) + forM_ l (go toskip) liftIO $ void cleanup recordPerformedMigrations branchsha toskip + where + go toskip c + | newref c `elem` nullShas = return () + | changed c `elem` toskip = return () + | not ("/new/" `B.isInfixOf` newfile) = return () + | otherwise = + catKey (newref c) >>= \case + Nothing -> return () + Just newkey -> catKey oldfileref >>= \case + Nothing -> return () + Just oldkey -> a oldkey newkey + where + newfile = toRawFilePath (changedfile c) + oldfile = migrationTreeGraftPoint + P. "old" + P. P.takeBaseName (fromInternalGitPath newfile) + oldfileref = branchFileRef (changed c) oldfile getPerformedMigrations :: Annex (Maybe Sha, [Sha]) getPerformedMigrations = do @@ -161,12 +175,12 @@ getPerformedMigrations = do -- commit. The list is additional commits that can be removed from the -- log file if present. recordPerformedMigrations :: Sha -> [Sha] -> Annex () -recordPerformedMigrations sha toremove = do +recordPerformedMigrations commit toremove = do logf <- fromRepo gitAnnexMigrationsLog lckf <- fromRepo gitAnnexMigrationsLock modifyLogFile logf lckf (update . drop 1) where - update l = L.fromStrict (fromRef' sha) : filter (`notElem` toremove') l + update l = L.fromStrict (fromRef' commit) : filter (`notElem` toremove') l toremove' = map (L.fromStrict . fromRef') toremove -- Record that a migration was performed locally and committed.