migrate --update gets keys

The git log is outputting the diff, but this only looks at the new
files. When we have a new file, we can get the old filename by just
replacing "new" with "old". And then use branchFileRef to refer to it
allows catting the old key.

While this does have to skip past the old files in the diff, it's still
faster than calling git diff separately.

Sponsored-by: Nicholas Golder-Manning on Patreon
This commit is contained in:
Joey Hess 2023-12-07 17:03:58 -04:00
parent f1ce15036f
commit 7c7c9912c1
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 38 additions and 22 deletions

View file

@ -165,7 +165,7 @@ startAll o outputter = do
void $ liftIO cleanup void $ liftIO cleanup
stop stop
{- Displays changes made. Only works when all the RefChanges are for the {- Displays changes made. Only works when all the LoggedFileChanges are for the
- same key. The method is to compare each value with the value - same key. The method is to compare each value with the value
- after it in the list, which is the old version of the value. - after it in the list, which is the old version of the value.
- -
@ -179,7 +179,7 @@ startAll o outputter = do
- This also generates subtly better output when the git-annex branch - This also generates subtly better output when the git-annex branch
- got diverged. - got diverged.
-} -}
showLogIncremental :: Outputter -> [RefChange Key] -> Annex () showLogIncremental :: Outputter -> [LoggedFileChange Key] -> Annex ()
showLogIncremental outputter ps = do showLogIncremental outputter ps = do
sets <- mapM (getset newref) ps sets <- mapM (getset newref) ps
previous <- maybe (return genesis) (getset oldref) (lastMaybe ps) previous <- maybe (return genesis) (getset oldref) (lastMaybe ps)
@ -196,7 +196,7 @@ showLogIncremental outputter ps = do
{- Displays changes made. Streams, and can display changes affecting {- Displays changes made. Streams, and can display changes affecting
- different keys, but does twice as much reading of logged values - different keys, but does twice as much reading of logged values
- as showLogIncremental. -} - as showLogIncremental. -}
showLog :: (ActionItem -> Outputter) -> [RefChange Key] -> Annex () showLog :: (ActionItem -> Outputter) -> [LoggedFileChange Key] -> Annex ()
showLog outputter cs = forM_ cs $ \c -> do showLog outputter cs = forM_ cs $ \c -> do
let ai = mkActionItem (changed c) let ai = mkActionItem (changed c)
new <- S.fromList <$> loggedLocationsRef (newref c) new <- S.fromList <$> loggedLocationsRef (newref c)
@ -279,7 +279,7 @@ compareChanges format changes = concatMap diff changes
- once the location log file is gone avoids it checking all the way back - once the location log file is gone avoids it checking all the way back
- to commit 0 to see if it used to exist, so generally speeds things up a - to commit 0 to see if it used to exist, so generally speeds things up a
- *lot* for newish files. -} - *lot* for newish files. -}
getKeyLog :: Key -> [CommandParam] -> Annex ([RefChange Key], IO Bool) getKeyLog :: Key -> [CommandParam] -> Annex ([LoggedFileChange Key], IO Bool)
getKeyLog key os = do getKeyLog key os = do
top <- fromRepo Git.repoPath top <- fromRepo Git.repoPath
p <- liftIO $ relPathCwdToFile top p <- liftIO $ relPathCwdToFile top
@ -287,7 +287,7 @@ getKeyLog key os = do
let logfile = p P.</> locationLogFile config key let logfile = p P.</> locationLogFile config key
getGitLogAnnex [fromRawFilePath logfile] (Param "--remove-empty" : os) getGitLogAnnex [fromRawFilePath logfile] (Param "--remove-empty" : os)
getGitLogAnnex :: [FilePath] -> [CommandParam] -> Annex ([RefChange Key], IO Bool) getGitLogAnnex :: [FilePath] -> [CommandParam] -> Annex ([LoggedFileChange Key], IO Bool)
getGitLogAnnex fs os = do getGitLogAnnex fs os = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
let fileselector = \_sha f -> let fileselector = \_sha f ->

View file

@ -16,7 +16,7 @@ import Data.Time
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
-- A change made to a file. -- A change made to a file.
data RefChange t = RefChange data LoggedFileChange t = LoggedFileChange
{ changetime :: POSIXTime { changetime :: POSIXTime
, changed :: t , changed :: t
, changedfile :: FilePath , changedfile :: FilePath
@ -25,7 +25,9 @@ data RefChange t = RefChange
} }
deriving (Show) deriving (Show)
-- Get the git log. Note that the returned cleanup action should only be -- Get the git log of changes to files.
--
-- Note that the returned cleanup action should only be
-- run after processing the returned list. -- run after processing the returned list.
getGitLog getGitLog
:: Ref :: Ref
@ -34,7 +36,7 @@ getGitLog
-> [CommandParam] -> [CommandParam]
-> (Sha -> FilePath -> Maybe t) -> (Sha -> FilePath -> Maybe t)
-> Repo -> Repo
-> IO ([RefChange t], IO Bool) -> IO ([LoggedFileChange t], IO Bool)
getGitLog ref stopref fs os selector repo = do getGitLog ref stopref fs os selector repo = do
(ls, cleanup) <- pipeNullSplit ps repo (ls, cleanup) <- pipeNullSplit ps repo
return (parseGitRawLog selector (map decodeBL ls), cleanup) return (parseGitRawLog selector (map decodeBL ls), cleanup)
@ -73,7 +75,7 @@ commitinfoFormat = "%H %ct"
-- --
-- The commitinfo is not included before all changelines, so -- The commitinfo is not included before all changelines, so
-- keep track of the most recently seen commitinfo. -- keep track of the most recently seen commitinfo.
parseGitRawLog :: (Ref -> FilePath -> Maybe t) -> [String] -> [RefChange t] parseGitRawLog :: (Ref -> FilePath -> Maybe t) -> [String] -> [LoggedFileChange t]
parseGitRawLog selector = parse (deleteSha, epoch) parseGitRawLog selector = parse (deleteSha, epoch)
where where
epoch = toEnum 0 :: POSIXTime epoch = toEnum 0 :: POSIXTime
@ -90,7 +92,7 @@ parseGitRawLog selector = parse (deleteSha, epoch)
mrc = do mrc = do
(old, new) <- parseRawChangeLine cl (old, new) <- parseRawChangeLine cl
v <- selector commitsha c2 v <- selector commitsha c2
return $ RefChange return $ LoggedFileChange
{ changetime = ts { changetime = ts
, changed = v , changed = v
, changedfile = c2 , changedfile = c2

View file

@ -50,14 +50,17 @@ import qualified Annex.Branch
import Git.Types import Git.Types
import Git.Tree import Git.Tree
import Git.FilePath import Git.FilePath
import Git.Ref
import Git.Sha
import Git.Log
import Logs.File import Logs.File
import Logs import Logs
import Git.Log
import Git.Sha
import Annex.CatFile import Annex.CatFile
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import Control.Concurrent.STM import Control.Concurrent.STM
import System.FilePath.ByteString as P
-- | What to use to record a migration. This should be the same Sha that is -- | What to use to record a migration. This should be the same Sha that is
-- used to as the content of the annexed file in the HEAD branch. -- used to as the content of the annexed file in the HEAD branch.
@ -128,16 +131,27 @@ streamNewDistributedMigrations a = do
-- and then deleted, and normally git log stops when a file -- and then deleted, and normally git log stops when a file
-- gets deleted. -- gets deleted.
([Param "--reverse", Param "--follow"]) ([Param "--reverse", Param "--follow"])
(\sha _file -> Just sha) (\commit _file -> Just commit)
forM_ l $ \c -> forM_ l (go toskip)
unless (changed c `elem` toskip) $ do
moldkey <- catKey XXX
mnewkey <- catKey YYY
case (moldkey, mnewkey) of
(Just oldkey, Just newkey) -> a oldkey newkey
_ -> return ()
liftIO $ void cleanup liftIO $ void cleanup
recordPerformedMigrations branchsha toskip recordPerformedMigrations branchsha toskip
where
go toskip c
| newref c `elem` nullShas = return ()
| changed c `elem` toskip = return ()
| not ("/new/" `B.isInfixOf` newfile) = return ()
| otherwise =
catKey (newref c) >>= \case
Nothing -> return ()
Just newkey -> catKey oldfileref >>= \case
Nothing -> return ()
Just oldkey -> a oldkey newkey
where
newfile = toRawFilePath (changedfile c)
oldfile = migrationTreeGraftPoint
P.</> "old"
P.</> P.takeBaseName (fromInternalGitPath newfile)
oldfileref = branchFileRef (changed c) oldfile
getPerformedMigrations :: Annex (Maybe Sha, [Sha]) getPerformedMigrations :: Annex (Maybe Sha, [Sha])
getPerformedMigrations = do getPerformedMigrations = do
@ -161,12 +175,12 @@ getPerformedMigrations = do
-- commit. The list is additional commits that can be removed from the -- commit. The list is additional commits that can be removed from the
-- log file if present. -- log file if present.
recordPerformedMigrations :: Sha -> [Sha] -> Annex () recordPerformedMigrations :: Sha -> [Sha] -> Annex ()
recordPerformedMigrations sha toremove = do recordPerformedMigrations commit toremove = do
logf <- fromRepo gitAnnexMigrationsLog logf <- fromRepo gitAnnexMigrationsLog
lckf <- fromRepo gitAnnexMigrationsLock lckf <- fromRepo gitAnnexMigrationsLock
modifyLogFile logf lckf (update . drop 1) modifyLogFile logf lckf (update . drop 1)
where where
update l = L.fromStrict (fromRef' sha) : filter (`notElem` toremove') l update l = L.fromStrict (fromRef' commit) : filter (`notElem` toremove') l
toremove' = map (L.fromStrict . fromRef') toremove toremove' = map (L.fromStrict . fromRef') toremove
-- Record that a migration was performed locally and committed. -- Record that a migration was performed locally and committed.