diff --git a/CHANGELOG b/CHANGELOG index 97eb23e410..850d88ff4a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,9 @@ git-annex (8.20210622) UNRELEASED; urgency=medium with youtube-dl or a special remote. In particular this can avoid falling back to raw download when youtube-dl is blocked for some reason. + * Fixed bug that interrupting git-annex repair (or assistant) while + it was fixing repository corruption would lose objects that were + contained in pack files. -- Joey Hess Mon, 21 Jun 2021 12:25:25 -0400 diff --git a/Git/Repair.hs b/Git/Repair.hs index d80ebd8b68..d52d70ba07 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -1,6 +1,6 @@ {- git repository recovery - - - Copyright 2013-2014 Joey Hess + - Copyright 2013-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -29,6 +29,7 @@ import Git.Sha import Git.Types import Git.Fsck import Git.Index +import Git.Env import qualified Git.Config as Config import qualified Git.Construct as Construct import qualified Git.LsTree as LsTree @@ -61,15 +62,14 @@ cleanCorruptObjects fsckresults r = do whenM (isMissing s r) $ removeLoose s -{- Explodes all pack files, and deletes them. +{- Explodes all pack files to loose objects, and deletes the pack files. - - - First moves all pack files to a temp dir, before unpacking them each in - - turn. + - git unpack-objects will not unpack objects from a pack file that are + - in the git repo. So, GIT_OBJECT_DIRECTORY is pointed to a temporary + - directory, and the loose objects then are moved into place, before + - deleting the pack files. - - - This is because unpack-objects will not unpack a pack file if it's in the - - git repo. - - - - Also, this prevents unpack-objects from possibly looking at corrupt + - Also, that prevents unpack-objects from possibly looking at corrupt - pack files to see if they contain an object, while unpacking a - non-corrupt pack file. -} @@ -78,18 +78,28 @@ explodePacks r = go =<< listPackFiles r where go [] = return False go packs = withTmpDir "packs" $ \tmpdir -> do + r' <- addGitEnv r "GIT_OBJECT_DIRECTORY" tmpdir putStrLn "Unpacking all pack files." forM_ packs $ \packfile -> do - moveFile packfile (tmpdir takeFileName packfile) - removeWhenExistsWith R.removeLink - (packIdxFile (toRawFilePath packfile)) - forM_ packs $ \packfile -> do - let tmp = tmpdir takeFileName packfile - allowRead (toRawFilePath tmp) + -- Just in case permissions are messed up. + allowRead (toRawFilePath packfile) -- May fail, if pack file is corrupt. void $ tryIO $ - pipeWrite [Param "unpack-objects", Param "-r"] r $ \h -> - L.hPut h =<< L.readFile tmp + pipeWrite [Param "unpack-objects", Param "-r"] r' $ \h -> + L.hPut h =<< L.readFile packfile + objs <- dirContentsRecursive tmpdir + forM_ objs $ \objfile -> do + f <- relPathDirToFile + (toRawFilePath tmpdir) + (toRawFilePath objfile) + let dest = objectsDir r P. f + createDirectoryIfMissing True + (fromRawFilePath (parentDir dest)) + moveFile objfile (fromRawFilePath dest) + forM_ packs $ \packfile -> do + let f = toRawFilePath packfile + removeWhenExistsWith R.removeLink f + removeWhenExistsWith R.removeLink (packIdxFile f) return True {- Try to retrieve a set of missing objects, from the remotes of a diff --git a/doc/bugs/Git_repos_corrupt_themselves/comment_16_586e591a1e3cc53fd0108da91492c5c2._comment b/doc/bugs/Git_repos_corrupt_themselves/comment_16_586e591a1e3cc53fd0108da91492c5c2._comment new file mode 100644 index 0000000000..3d15799ed7 --- /dev/null +++ b/doc/bugs/Git_repos_corrupt_themselves/comment_16_586e591a1e3cc53fd0108da91492c5c2._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 16""" + date="2021-06-29T16:11:03Z" + content=""" +I have verified that an interrupted repair results in data loss, when +.git/objects contains pack files. + +Fixed that. Which leaves 2 open questions: + +* Was the assistant interrupted somehow while it was running repair, in + Atemu's case? That could have been anything from the laptop being + powered off, to it being stopped explicitly, to it crashing. +* What caused the assistant to think it needed repair in the first place? + +Although the answer to the second question doesn't matter much if the +data loss bug is fixed -- if there's a problem of some kind causing +unncessary repairs, it would only be some excess CPU load. +"""]]