started migrate --update
This is most of the way there, but not quite working. The layout of migrate.tree/ needs to be changed to follow this approach. git log will list all the files in tree order, so the new layout needs to alternate old and new keys. Can that be done? git may not document tree order, or may not preserve it here. Alternatively, change to using git log --format=raw and extract the tree header from that, then use git diff --raw $tree:migrate.tree/old $tree:migrate.tree/new That will be a little more expensive, but only when there are lots of migrations. Sponsored-by: Joshua Antonishen on Patreon
This commit is contained in:
parent
d06aee7ce0
commit
f1ce15036f
9 changed files with 172 additions and 38 deletions
|
@ -14,6 +14,7 @@ module Annex.Branch (
|
|||
hasSibling,
|
||||
siblingBranches,
|
||||
create,
|
||||
getBranch,
|
||||
UpdateMade(..),
|
||||
update,
|
||||
forceUpdate,
|
||||
|
@ -120,7 +121,7 @@ siblingBranches = inRepo $ Git.Ref.matchingUniq [name]
|
|||
create :: Annex ()
|
||||
create = void getBranch
|
||||
|
||||
{- Returns the ref of the branch, creating it first if necessary. -}
|
||||
{- Returns the sha of the branch, creating it first if necessary. -}
|
||||
getBranch :: Annex Git.Ref
|
||||
getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha
|
||||
where
|
||||
|
@ -920,10 +921,14 @@ getMergedRefs' = do
|
|||
{- Grafts a treeish into the branch at the specified location,
|
||||
- and then removes it. This ensures that the treeish won't get garbage
|
||||
- collected, and will always be available as long as the git-annex branch
|
||||
- is available. -}
|
||||
rememberTreeish :: Git.Ref -> TopFilePath -> Annex ()
|
||||
rememberTreeish treeish graftpoint = lockJournal $ rememberTreeishLocked treeish graftpoint
|
||||
rememberTreeishLocked :: Git.Ref -> TopFilePath -> JournalLocked -> Annex ()
|
||||
- is available.
|
||||
-
|
||||
- Returns the sha of the git commit made to the git-annex branch.
|
||||
-}
|
||||
rememberTreeish :: Git.Ref -> TopFilePath -> Annex Git.Sha
|
||||
rememberTreeish treeish graftpoint = lockJournal $
|
||||
rememberTreeishLocked treeish graftpoint
|
||||
rememberTreeishLocked :: Git.Ref -> TopFilePath -> JournalLocked -> Annex Git.Sha
|
||||
rememberTreeishLocked treeish graftpoint jl = do
|
||||
branchref <- getBranch
|
||||
updateIndex jl branchref
|
||||
|
@ -940,6 +945,7 @@ rememberTreeishLocked treeish graftpoint jl = do
|
|||
-- and the index was updated to that above, so it's safe to
|
||||
-- say that the index contains c'.
|
||||
setIndexSha c'
|
||||
return c'
|
||||
|
||||
{- Runs an action on the content of selected files from the branch.
|
||||
- This is much faster than reading the content of each file in turn,
|
||||
|
|
|
@ -56,6 +56,8 @@ module Annex.Locations (
|
|||
gitAnnexAdjustedBranchUpdateLock,
|
||||
gitAnnexMigrateLog,
|
||||
gitAnnexMigrateLock,
|
||||
gitAnnexMigrationsLog,
|
||||
gitAnnexMigrationsLock,
|
||||
gitAnnexMoveLog,
|
||||
gitAnnexMoveLock,
|
||||
gitAnnexExportDir,
|
||||
|
@ -416,6 +418,13 @@ gitAnnexMigrateLog r = gitAnnexDir r P.</> "migrate.log"
|
|||
gitAnnexMigrateLock :: Git.Repo -> RawFilePath
|
||||
gitAnnexMigrateLock r = gitAnnexDir r P.</> "migrate.lck"
|
||||
|
||||
{- .git/annex/migrations.log is used to log committed migrations. -}
|
||||
gitAnnexMigrationsLog :: Git.Repo -> RawFilePath
|
||||
gitAnnexMigrationsLog r = gitAnnexDir r P.</> "migrations.log"
|
||||
|
||||
gitAnnexMigrationsLock :: Git.Repo -> RawFilePath
|
||||
gitAnnexMigrationsLock r = gitAnnexDir r P.</> "migrations.lck"
|
||||
|
||||
{- .git/annex/move.log is used to log moves that are in progress,
|
||||
- to better support resuming an interrupted move. -}
|
||||
gitAnnexMoveLog :: Git.Repo -> RawFilePath
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
git-annex (10.20231130) UNRELEASED; urgency=medium
|
||||
|
||||
* migrate: Support distributed migrations by recording each migration,
|
||||
and adding a --update option that updates the local repository.
|
||||
* Make git-annex get/copy/move --from foo override configuration of
|
||||
remote.foo.annex-ignore, as documented.
|
||||
* Support git-annex copy/move --from-anywhere --to remote.
|
||||
|
|
|
@ -290,8 +290,9 @@ getKeyLog key os = do
|
|||
getGitLogAnnex :: [FilePath] -> [CommandParam] -> Annex ([RefChange Key], IO Bool)
|
||||
getGitLogAnnex fs os = do
|
||||
config <- Annex.getGitConfig
|
||||
let fileselector = locationLogFileKey config . toRawFilePath
|
||||
inRepo $ getGitLog Annex.Branch.fullname fs os fileselector
|
||||
let fileselector = \_sha f ->
|
||||
locationLogFileKey config (toRawFilePath f)
|
||||
inRepo $ getGitLog Annex.Branch.fullname Nothing fs os fileselector
|
||||
|
||||
showTimeStamp :: TimeZone -> String -> POSIXTime -> String
|
||||
showTimeStamp zone format = formatTime defaultTimeLocale format
|
||||
|
@ -321,13 +322,13 @@ sizeHistoryInfo mu o = do
|
|||
-- and to the trust log.
|
||||
getlog = do
|
||||
config <- Annex.getGitConfig
|
||||
let fileselector = \f -> let f' = toRawFilePath f in
|
||||
let fileselector = \_sha f -> let f' = toRawFilePath f in
|
||||
case locationLogFileKey config f' of
|
||||
Just k -> Just (Right k)
|
||||
Nothing
|
||||
| f' == trustLog -> Just (Left ())
|
||||
| otherwise -> Nothing
|
||||
inRepo $ getGitLog Annex.Branch.fullname []
|
||||
inRepo $ getGitLog Annex.Branch.fullname Nothing []
|
||||
[ Param "--date-order"
|
||||
, Param "--reverse"
|
||||
]
|
||||
|
|
|
@ -28,21 +28,31 @@ cmd = withAnnexOptions [backendOption, annexedMatchingOptions, jsonOptions] $
|
|||
|
||||
data MigrateOptions = MigrateOptions
|
||||
{ migrateThese :: CmdParams
|
||||
, updateOption :: Bool
|
||||
, removeSize :: Bool
|
||||
}
|
||||
|
||||
optParser :: CmdParamsDesc -> Parser MigrateOptions
|
||||
optParser desc = MigrateOptions
|
||||
<$> cmdParams desc
|
||||
<*> switch
|
||||
( long "update"
|
||||
<> help "update for migrations performed elsewhere"
|
||||
)
|
||||
<*> switch
|
||||
( long "remove-size"
|
||||
<> help "remove size field from keys"
|
||||
)
|
||||
|
||||
seek :: MigrateOptions -> CommandSeek
|
||||
seek o = do
|
||||
withFilesInGitAnnex ww seeker =<< workTreeItems ww (migrateThese o)
|
||||
commitMigration
|
||||
seek o
|
||||
| updateOption o = do
|
||||
unless (null (migrateThese o)) $
|
||||
error "Cannot combine --update with files to migrate."
|
||||
commandAction update
|
||||
| otherwise = do
|
||||
withFilesInGitAnnex ww seeker =<< workTreeItems ww (migrateThese o)
|
||||
commitMigration
|
||||
where
|
||||
ww = WarnUnmatchLsFiles "migrate"
|
||||
seeker = AnnexedFileSeeker
|
||||
|
@ -131,3 +141,10 @@ perform onlyremovesize o file oldkey oldkeyrec oldbackend newbackend = go =<< ge
|
|||
| removeSize o = alterKey k $ \kd -> kd { keySize = Nothing }
|
||||
| otherwise = k
|
||||
afile = AssociatedFile (Just file)
|
||||
|
||||
update :: CommandStart
|
||||
update = starting "migrate" (ActionItemOther Nothing) (SeekInput []) $ do
|
||||
streamNewDistributedMigrations $ \oldkey newkey -> do
|
||||
liftIO $ print ("migrate", oldkey, newkey)
|
||||
next $ return True
|
||||
|
||||
|
|
40
Git/Log.hs
40
Git/Log.hs
|
@ -10,6 +10,7 @@ module Git.Log where
|
|||
import Common
|
||||
import Git
|
||||
import Git.Command
|
||||
import Git.Sha
|
||||
|
||||
import Data.Time
|
||||
import Data.Time.Clock.POSIX
|
||||
|
@ -28,14 +29,15 @@ data RefChange t = RefChange
|
|||
-- run after processing the returned list.
|
||||
getGitLog
|
||||
:: Ref
|
||||
-> Maybe Ref
|
||||
-> [FilePath]
|
||||
-> [CommandParam]
|
||||
-> (FilePath -> Maybe t)
|
||||
-> (Sha -> FilePath -> Maybe t)
|
||||
-> Repo
|
||||
-> IO ([RefChange t], IO Bool)
|
||||
getGitLog ref fs os fileselector repo = do
|
||||
getGitLog ref stopref fs os selector repo = do
|
||||
(ls, cleanup) <- pipeNullSplit ps repo
|
||||
return (parseGitRawLog fileselector (map decodeBL ls), cleanup)
|
||||
return (parseGitRawLog selector (map decodeBL ls), cleanup)
|
||||
where
|
||||
ps =
|
||||
[ Param "log"
|
||||
|
@ -45,14 +47,16 @@ getGitLog ref fs os fileselector repo = do
|
|||
, Param "--no-abbrev"
|
||||
, Param "--no-renames"
|
||||
] ++ os ++
|
||||
[ Param (fromRef ref)
|
||||
[ case stopref of
|
||||
Just stopref' -> Param $
|
||||
fromRef stopref' <> ".." <> fromRef ref
|
||||
Nothing -> Param (fromRef ref)
|
||||
, Param "--"
|
||||
] ++ map Param fs
|
||||
|
||||
-- The commitinfo is the timestamp of the commit, followed by
|
||||
-- the commit hash and then the commit's parents, separated by spaces.
|
||||
-- The commitinfo is the commit hash followed by its timestamp.
|
||||
commitinfoFormat :: String
|
||||
commitinfoFormat = "%ct"
|
||||
commitinfoFormat = "%H %ct"
|
||||
|
||||
-- Parses chunked git log --raw output generated by getGitLog,
|
||||
-- which looks something like:
|
||||
|
@ -69,21 +73,23 @@ commitinfoFormat = "%ct"
|
|||
--
|
||||
-- The commitinfo is not included before all changelines, so
|
||||
-- keep track of the most recently seen commitinfo.
|
||||
parseGitRawLog :: (FilePath -> Maybe t) -> [String] -> [RefChange t]
|
||||
parseGitRawLog fileselector = parse epoch
|
||||
parseGitRawLog :: (Ref -> FilePath -> Maybe t) -> [String] -> [RefChange t]
|
||||
parseGitRawLog selector = parse (deleteSha, epoch)
|
||||
where
|
||||
epoch = toEnum 0 :: POSIXTime
|
||||
parse oldts ([]:rest) = parse oldts rest
|
||||
parse oldts (c1:c2:rest) = case mrc of
|
||||
Just rc -> rc : parse ts rest
|
||||
Nothing -> parse ts (c2:rest)
|
||||
parse old ([]:rest) = parse old rest
|
||||
parse (oldcommitsha, oldts) (c1:c2:rest) = case mrc of
|
||||
Just rc -> rc : parse (commitsha, ts) rest
|
||||
Nothing -> parse (commitsha, ts) (c2:rest)
|
||||
where
|
||||
(ts, cl) = case separate (== '\n') c1 of
|
||||
(cl', []) -> (oldts, cl')
|
||||
(tss, cl') -> (parseTimeStamp tss, cl')
|
||||
(commitsha, ts, cl) = case separate (== '\n') c1 of
|
||||
(cl', []) -> (oldcommitsha, oldts, cl')
|
||||
(ci, cl') -> case words ci of
|
||||
(css:tss:[]) -> (Ref (encodeBS css), parseTimeStamp tss, cl')
|
||||
_ -> (oldcommitsha, oldts, cl')
|
||||
mrc = do
|
||||
(old, new) <- parseRawChangeLine cl
|
||||
v <- fileselector c2
|
||||
v <- selector commitsha c2
|
||||
return $ RefChange
|
||||
{ changetime = ts
|
||||
, changed = v
|
||||
|
|
|
@ -78,7 +78,7 @@ recordExportBeginning remoteuuid newtree = do
|
|||
-- repository, the tree has to be kept available, even if it
|
||||
-- doesn't end up being merged into the master branch.
|
||||
recordExportTreeish :: Git.Ref -> Annex ()
|
||||
recordExportTreeish t =
|
||||
recordExportTreeish t = void $
|
||||
Annex.Branch.rememberTreeish t (asTopFilePath exportTreeGraftPoint)
|
||||
|
||||
-- | Record that an export to a special remote is under way.
|
||||
|
|
|
@ -16,7 +16,19 @@
|
|||
- The filenames used in the trees are not the original filenames, to avoid
|
||||
- running migrate in a throwaway branch unexpectedly recording that
|
||||
- branch's contents.
|
||||
-
|
||||
-
|
||||
- There are two local log files:
|
||||
- * migrate.log contains pairs of old and new keys, and is used while
|
||||
- performing a new migration, to build up a migration to commit.
|
||||
- This allows an interrupted migration to be resumed later.
|
||||
- * migrations.log has as its first line a commit to the git-annex branch
|
||||
- up to which all migrations have been performed locally (including any
|
||||
- migrations in parent commits). Or the first line may be a null sha when
|
||||
- this has not been done yet. The rest of the lines in the file
|
||||
- are commits that have been made for locally performed migrations,
|
||||
- but whose parent commits have not necessarily been checked for
|
||||
- migrations yet.
|
||||
-
|
||||
- Copyright 2023 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
|
@ -28,6 +40,7 @@ module Logs.Migrate (
|
|||
MigrationRecord(..),
|
||||
logMigration,
|
||||
commitMigration,
|
||||
streamNewDistributedMigrations,
|
||||
) where
|
||||
|
||||
import Annex.Common
|
||||
|
@ -39,6 +52,9 @@ import Git.Tree
|
|||
import Git.FilePath
|
||||
import Logs.File
|
||||
import Logs
|
||||
import Git.Log
|
||||
import Git.Sha
|
||||
import Annex.CatFile
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Control.Concurrent.STM
|
||||
|
@ -48,9 +64,6 @@ import Control.Concurrent.STM
|
|||
newtype MigrationRecord = MigrationRecord { fromMigrationRecord :: Git.Sha }
|
||||
|
||||
-- | Logs a migration from an old to a new key.
|
||||
--
|
||||
-- This writes to a log file, which can later be committed. That allows an
|
||||
-- interrupted migration to be resumed later.
|
||||
logMigration :: MigrationRecord -> MigrationRecord -> Annex ()
|
||||
logMigration old new = do
|
||||
logf <- fromRepo gitAnnexMigrateLog
|
||||
|
@ -97,5 +110,77 @@ commitMigration = do
|
|||
[ RecordedSubTree (asTopFilePath "old") oldt []
|
||||
, RecordedSubTree (asTopFilePath "new") newt []
|
||||
]
|
||||
Annex.Branch.rememberTreeish treesha
|
||||
commitsha <- Annex.Branch.rememberTreeish treesha
|
||||
(asTopFilePath migrationTreeGraftPoint)
|
||||
committedMigration commitsha
|
||||
|
||||
-- Streams distributed migrations from the git-annex branch that have not
|
||||
-- been performed here, and runs the provided action on each old and new
|
||||
-- key pair.
|
||||
streamNewDistributedMigrations :: (Key -> Key -> Annex ()) -> Annex ()
|
||||
streamNewDistributedMigrations a = do
|
||||
void Annex.Branch.update
|
||||
branchsha <- Annex.Branch.getBranch
|
||||
(stoppoint, toskip) <- getPerformedMigrations
|
||||
(l, cleanup) <- inRepo $ getGitLog branchsha stoppoint
|
||||
[fromRawFilePath migrationTreeGraftPoint]
|
||||
-- Need to follow because migrate.tree is grafted in
|
||||
-- and then deleted, and normally git log stops when a file
|
||||
-- gets deleted.
|
||||
([Param "--reverse", Param "--follow"])
|
||||
(\sha _file -> Just sha)
|
||||
forM_ l $ \c ->
|
||||
unless (changed c `elem` toskip) $ do
|
||||
moldkey <- catKey XXX
|
||||
mnewkey <- catKey YYY
|
||||
case (moldkey, mnewkey) of
|
||||
(Just oldkey, Just newkey) -> a oldkey newkey
|
||||
_ -> return ()
|
||||
liftIO $ void cleanup
|
||||
recordPerformedMigrations branchsha toskip
|
||||
|
||||
getPerformedMigrations :: Annex (Maybe Sha, [Sha])
|
||||
getPerformedMigrations = do
|
||||
logf <- fromRepo gitAnnexMigrationsLog
|
||||
lckf <- fromRepo gitAnnexMigrationsLock
|
||||
ls <- calcLogFile logf lckf [] (:)
|
||||
return $ case reverse ls of
|
||||
[] -> (Nothing, [])
|
||||
(stoppoint:toskip) ->
|
||||
let stoppoint' = conv stoppoint
|
||||
in
|
||||
( if stoppoint' `elem` nullShas
|
||||
then Nothing
|
||||
else Just stoppoint'
|
||||
, map conv toskip
|
||||
)
|
||||
where
|
||||
conv = Git.Ref . L.toStrict
|
||||
|
||||
-- Record locally that migrations have been performed up to the given
|
||||
-- commit. The list is additional commits that can be removed from the
|
||||
-- log file if present.
|
||||
recordPerformedMigrations :: Sha -> [Sha] -> Annex ()
|
||||
recordPerformedMigrations sha toremove = do
|
||||
logf <- fromRepo gitAnnexMigrationsLog
|
||||
lckf <- fromRepo gitAnnexMigrationsLock
|
||||
modifyLogFile logf lckf (update . drop 1)
|
||||
where
|
||||
update l = L.fromStrict (fromRef' sha) : filter (`notElem` toremove') l
|
||||
toremove' = map (L.fromStrict . fromRef') toremove
|
||||
|
||||
-- Record that a migration was performed locally and committed.
|
||||
-- Since committing a migration may result in parent migrations that have
|
||||
-- not yet been processed locally, that commit cannot be the first line of
|
||||
-- the log file, which is reserved for commits whose parents have also had
|
||||
-- their migrations handled. So if the log file does not exist or is empty,
|
||||
-- make the first line a null sha.
|
||||
committedMigration :: Sha -> Annex ()
|
||||
committedMigration commitsha = do
|
||||
logf <- fromRepo gitAnnexMigrationsLog
|
||||
lckf <- fromRepo gitAnnexMigrationsLock
|
||||
modifyLogFile logf lckf update
|
||||
where
|
||||
update [] = [conv deleteSha, conv commitsha]
|
||||
update logged = logged ++ [conv commitsha]
|
||||
conv = L.fromStrict . fromRef'
|
||||
|
|
|
@ -6,6 +6,8 @@ git-annex migrate - switch data to different backend
|
|||
|
||||
git annex migrate `[path ...]`
|
||||
|
||||
git annex migrate --update
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
Changes the specified annexed files to use the default key-value backend
|
||||
|
@ -19,11 +21,17 @@ Normally, nothing will be done to files already using the new backend.
|
|||
However, if a backend changes the information it uses to construct a key,
|
||||
this can also be used to migrate files to use the new key format.
|
||||
|
||||
When you have multiple repositories that each contain a copy of a file,
|
||||
it's best to run migrate in all of them.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
* `--update`
|
||||
|
||||
This updates the local repository for migrations that were performed
|
||||
elsewhere.
|
||||
|
||||
Note that older versions of git-annex did not record migrations in a
|
||||
way that this can use. Migrations performed with those older versions
|
||||
had to be manually run in each clone of the repository.
|
||||
|
||||
* `--backend`
|
||||
|
||||
Specify the new key-value backend to use for migrated data.
|
||||
|
|
Loading…
Reference in a new issue