From aa414d97c902fa350fe8c673eb9b931483657845 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 16 May 2022 15:19:48 -0400 Subject: [PATCH] make fsck normalize object locations The purpose of this is to fix situations where the annex object file is stored in a directory structure other than where annex symlinks point to. But it will also move object files from the hashdirmixed back to hashdirlower if the repo configuration makes that the normal location. It would have been more work to avoid that than to let it do it. Sponsored-by: Dartmouth College's Datalad project --- Annex/Content.hs | 9 ++- Annex/Content/Presence.hs | 1 + Annex/Locations.hs | 14 +++-- CHANGELOG | 2 + Command/Fsck.hs | 57 ++++++++++++++++++- ..._34__migrate__34___from_adjusted_mode.mdwn | 2 + 6 files changed, 78 insertions(+), 7 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 5b5b798dca..62471f7305 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -64,6 +64,7 @@ module Annex.Content ( isKeyUnlockedThin, getKeyStatus, getKeyFileStatus, + cleanObjectDirs, ) where import System.IO.Unsafe (unsafeInterleaveIO) @@ -610,6 +611,11 @@ cleanObjectLoc key cleaner = do cleaner cleanObjectDirs file +{- Given a filename inside the object directory, tries to remove the object + - directory, as well as the object hash directories. + - + - Does nothing if the object directory is not empty, and does not + - throw an exception if it's unable to remove a directory. -} cleanObjectDirs :: RawFilePath -> Annex () cleanObjectDirs f = do HashLevels n <- objectHashLevels <$> Annex.getGitConfig @@ -619,7 +625,8 @@ cleanObjectDirs f = do go file n = do let dir = parentDir file maybe noop (const $ go dir (n-1)) - <=< catchMaybeIO $ removeDirectory (fromRawFilePath dir) + <=< catchMaybeIO $ tryWhenExists $ + removeDirectory (fromRawFilePath dir) {- Removes a key's file from .git/annex/objects/ -} removeAnnex :: ContentRemovalLock -> Annex () diff --git a/Annex/Content/Presence.hs b/Annex/Content/Presence.hs index 86fde17e44..ff12c865c2 100644 --- a/Annex/Content/Presence.hs +++ b/Annex/Content/Presence.hs @@ -19,6 +19,7 @@ module Annex.Content.Presence ( isUnmodified', isUnmodifiedCheap, withContentLockFile, + contentLockFile, ) where import Annex.Content.Presence.LowLevel diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 25dd4a5f75..8cac9a9dac 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -16,6 +16,7 @@ module Annex.Locations ( objectDir, objectDir', gitAnnexLocation, + gitAnnexLocation', gitAnnexLocationDepth, gitAnnexLink, gitAnnexLinkCanonical, @@ -172,14 +173,17 @@ gitAnnexLocationDepth config = hashlevels + 1 - be stored. -} gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath -gitAnnexLocation key r config = gitAnnexLocation' key r config +gitAnnexLocation = gitAnnexLocation' R.doesPathExist + +gitAnnexLocation' :: (RawFilePath -> IO Bool) -> Key -> Git.Repo -> GitConfig -> IO RawFilePath +gitAnnexLocation' checker key r config = gitAnnexLocation'' key r config (annexCrippledFileSystem config) (coreSymlinks config) - R.doesPathExist + checker (Git.localGitDir r) -gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath -gitAnnexLocation' key r config crippled symlinkssupported checker gitdir +gitAnnexLocation'' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath +gitAnnexLocation'' key r config crippled symlinkssupported checker gitdir {- Bare repositories default to hashDirLower for new - content, as it's more portable. But check all locations. -} | Git.repoIsLocalBare r = checkall annexLocationsBare @@ -207,7 +211,7 @@ gitAnnexLink file key r config = do currdir <- R.getCurrentDirectory let absfile = absNormPathUnix currdir file let gitdir = getgitdir currdir - loc <- gitAnnexLocation' key r config False False (\_ -> return True) gitdir + loc <- gitAnnexLocation'' key r config False False (\_ -> return True) gitdir toInternalGitPath <$> relPathDirToFile (parentDir absfile) loc where getgitdir currdir diff --git a/CHANGELOG b/CHANGELOG index 70aeedebba..817dd32687 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,6 +12,8 @@ git-annex (10.20220505) UNRELEASED; urgency=medium * Special remotes using exporttree=yes and/or importtree=yes now checksum content while it is being retrieved, instead of in a separate pass at the end. + * fsck: Fix situations where the annex object file is stored in a + directory structure other than where annex symlinks point to. -- Joey Hess Thu, 05 May 2022 15:08:07 -0400 diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 4de157943f..f34640b087 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2021 Joey Hess + - Copyright 2010-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -16,9 +16,11 @@ import qualified Remote import qualified Types.Backend import qualified Backend import Annex.Content +import Annex.Content.Presence import Annex.Content.Presence.LowLevel import Annex.Perms import Annex.Link +import Annex.Version import Logs.Location import Logs.Trust import Logs.Activity @@ -134,6 +136,7 @@ perform key file backend numcopies = do check -- order matters [ fixLink key file + , fixObjectLocation key , verifyLocationLog key keystatus ai , verifyRequiredContent key ai , verifyAssociatedFiles key keystatus file @@ -243,6 +246,58 @@ fixLink key file = do addAnnexLink want file | otherwise = noop +{- A repository that supports symlinks and is not bare may have in the past + - been bare, or not supported symlinks. If so, the object may be located + - in a directory other than the one where annex symlinks point to. Moves + - the object in that case. + - + - Also if a repository has been converted to bare, or moved to a crippled + - filesystem not supporting symlinks, the object file will be moved + - to the other location. + -} +fixObjectLocation :: Key -> Annex Bool +fixObjectLocation key = do +#ifdef mingw32_HOST_OS + -- Windows does not allow locked files to be renamed, but annex + -- links are also not used on Windows. + return True +#else + loc <- calcRepo (gitAnnexLocation key) + idealloc <- calcRepo (gitAnnexLocation' (const (pure True)) key) + if loc == idealloc + then return True + else ifM (liftIO $ R.doesPathExist loc) + ( moveobjdir loc idealloc + `catchNonAsync` \_e -> return True + , return True + ) + where + moveobjdir src dest = do + let srcdir = parentDir src + let destdir = parentDir dest + showNote "normalizing object location" + -- When the content file is moved, it will + -- appear to other processes as if it has been removed. + -- That should never happen to a process that has used + -- lockContentShared, so avoid it by locking the content + -- for removal, although it's not really being removed. + lockContentForRemoval key (return True) $ \_lck -> do + -- Thaw the content directory to allow renaming it. + thawContentDir src + createAnnexDirectory (parentDir destdir) + liftIO $ renameDirectory + (fromRawFilePath srcdir) + (fromRawFilePath destdir) + -- Since the directory was moved, lockContentForRemoval + -- will not be able to remove the lock file it + -- made. So, remove the lock file here. + mlockfile <- contentLockFile key =<< getVersion + liftIO $ maybe noop (removeWhenExistsWith R.removeLink) mlockfile + freezeContentDir dest + cleanObjectDirs src + return True +#endif + {- Checks that the location log reflects the current status of the key, - in this repository only. -} verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool diff --git a/doc/todo/command_to___34__migrate__34___from_adjusted_mode.mdwn b/doc/todo/command_to___34__migrate__34___from_adjusted_mode.mdwn index d9a5db3987..024e1b2f7e 100644 --- a/doc/todo/command_to___34__migrate__34___from_adjusted_mode.mdwn +++ b/doc/todo/command_to___34__migrate__34___from_adjusted_mode.mdwn @@ -5,3 +5,5 @@ Checking out `master` branch is not sufficient since `.git/annex/objects` uses d [[!tag projects/datalad]] [[!meta title="command to migrate object files from hashdirlower to hashdirmixed"]] + +> [[fixed|done]] --[[Joey]]