make fsck normalize object locations

The purpose of this is to fix situations where the annex object file is
stored in a directory structure other than where annex symlinks point to.

But it will also move object files from the hashdirmixed back to
hashdirlower if the repo configuration makes that the normal location.
It would have been more work to avoid that than to let it do it.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2022-05-16 15:19:48 -04:00
parent 6b5029db29
commit aa414d97c9
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 78 additions and 7 deletions

View file

@ -64,6 +64,7 @@ module Annex.Content (
isKeyUnlockedThin,
getKeyStatus,
getKeyFileStatus,
cleanObjectDirs,
) where
import System.IO.Unsafe (unsafeInterleaveIO)
@ -610,6 +611,11 @@ cleanObjectLoc key cleaner = do
cleaner
cleanObjectDirs file
{- Given a filename inside the object directory, tries to remove the object
- directory, as well as the object hash directories.
-
- Does nothing if the object directory is not empty, and does not
- throw an exception if it's unable to remove a directory. -}
cleanObjectDirs :: RawFilePath -> Annex ()
cleanObjectDirs f = do
HashLevels n <- objectHashLevels <$> Annex.getGitConfig
@ -619,7 +625,8 @@ cleanObjectDirs f = do
go file n = do
let dir = parentDir file
maybe noop (const $ go dir (n-1))
<=< catchMaybeIO $ removeDirectory (fromRawFilePath dir)
<=< catchMaybeIO $ tryWhenExists $
removeDirectory (fromRawFilePath dir)
{- Removes a key's file from .git/annex/objects/ -}
removeAnnex :: ContentRemovalLock -> Annex ()

View file

@ -19,6 +19,7 @@ module Annex.Content.Presence (
isUnmodified',
isUnmodifiedCheap,
withContentLockFile,
contentLockFile,
) where
import Annex.Content.Presence.LowLevel

View file

@ -16,6 +16,7 @@ module Annex.Locations (
objectDir,
objectDir',
gitAnnexLocation,
gitAnnexLocation',
gitAnnexLocationDepth,
gitAnnexLink,
gitAnnexLinkCanonical,
@ -172,14 +173,17 @@ gitAnnexLocationDepth config = hashlevels + 1
- be stored.
-}
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation key r config = gitAnnexLocation' key r config
gitAnnexLocation = gitAnnexLocation' R.doesPathExist
gitAnnexLocation' :: (RawFilePath -> IO Bool) -> Key -> Git.Repo -> GitConfig -> IO RawFilePath
gitAnnexLocation' checker key r config = gitAnnexLocation'' key r config
(annexCrippledFileSystem config)
(coreSymlinks config)
R.doesPathExist
checker
(Git.localGitDir r)
gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath
gitAnnexLocation' key r config crippled symlinkssupported checker gitdir
gitAnnexLocation'' :: Key -> Git.Repo -> GitConfig -> Bool -> Bool -> (RawFilePath -> IO Bool) -> RawFilePath -> IO RawFilePath
gitAnnexLocation'' key r config crippled symlinkssupported checker gitdir
{- Bare repositories default to hashDirLower for new
- content, as it's more portable. But check all locations. -}
| Git.repoIsLocalBare r = checkall annexLocationsBare
@ -207,7 +211,7 @@ gitAnnexLink file key r config = do
currdir <- R.getCurrentDirectory
let absfile = absNormPathUnix currdir file
let gitdir = getgitdir currdir
loc <- gitAnnexLocation' key r config False False (\_ -> return True) gitdir
loc <- gitAnnexLocation'' key r config False False (\_ -> return True) gitdir
toInternalGitPath <$> relPathDirToFile (parentDir absfile) loc
where
getgitdir currdir

View file

@ -12,6 +12,8 @@ git-annex (10.20220505) UNRELEASED; urgency=medium
* Special remotes using exporttree=yes and/or importtree=yes now
checksum content while it is being retrieved, instead of in a separate
pass at the end.
* fsck: Fix situations where the annex object file is stored in a
directory structure other than where annex symlinks point to.
-- Joey Hess <id@joeyh.name> Thu, 05 May 2022 15:08:07 -0400

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -16,9 +16,11 @@ import qualified Remote
import qualified Types.Backend
import qualified Backend
import Annex.Content
import Annex.Content.Presence
import Annex.Content.Presence.LowLevel
import Annex.Perms
import Annex.Link
import Annex.Version
import Logs.Location
import Logs.Trust
import Logs.Activity
@ -134,6 +136,7 @@ perform key file backend numcopies = do
check
-- order matters
[ fixLink key file
, fixObjectLocation key
, verifyLocationLog key keystatus ai
, verifyRequiredContent key ai
, verifyAssociatedFiles key keystatus file
@ -243,6 +246,58 @@ fixLink key file = do
addAnnexLink want file
| otherwise = noop
{- A repository that supports symlinks and is not bare may have in the past
- been bare, or not supported symlinks. If so, the object may be located
- in a directory other than the one where annex symlinks point to. Moves
- the object in that case.
-
- Also if a repository has been converted to bare, or moved to a crippled
- filesystem not supporting symlinks, the object file will be moved
- to the other location.
-}
fixObjectLocation :: Key -> Annex Bool
fixObjectLocation key = do
#ifdef mingw32_HOST_OS
-- Windows does not allow locked files to be renamed, but annex
-- links are also not used on Windows.
return True
#else
loc <- calcRepo (gitAnnexLocation key)
idealloc <- calcRepo (gitAnnexLocation' (const (pure True)) key)
if loc == idealloc
then return True
else ifM (liftIO $ R.doesPathExist loc)
( moveobjdir loc idealloc
`catchNonAsync` \_e -> return True
, return True
)
where
moveobjdir src dest = do
let srcdir = parentDir src
let destdir = parentDir dest
showNote "normalizing object location"
-- When the content file is moved, it will
-- appear to other processes as if it has been removed.
-- That should never happen to a process that has used
-- lockContentShared, so avoid it by locking the content
-- for removal, although it's not really being removed.
lockContentForRemoval key (return True) $ \_lck -> do
-- Thaw the content directory to allow renaming it.
thawContentDir src
createAnnexDirectory (parentDir destdir)
liftIO $ renameDirectory
(fromRawFilePath srcdir)
(fromRawFilePath destdir)
-- Since the directory was moved, lockContentForRemoval
-- will not be able to remove the lock file it
-- made. So, remove the lock file here.
mlockfile <- contentLockFile key =<< getVersion
liftIO $ maybe noop (removeWhenExistsWith R.removeLink) mlockfile
freezeContentDir dest
cleanObjectDirs src
return True
#endif
{- Checks that the location log reflects the current status of the key,
- in this repository only. -}
verifyLocationLog :: Key -> KeyStatus -> ActionItem -> Annex Bool

View file

@ -5,3 +5,5 @@ Checking out `master` branch is not sufficient since `.git/annex/objects` uses d
[[!tag projects/datalad]]
[[!meta title="command to migrate object files from hashdirlower to hashdirmixed"]]
> [[fixed|done]] --[[Joey]]