From 73df633a6215faa093e4a7524c6d328aa988aed1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 19 Jan 2021 12:57:15 -0400 Subject: [PATCH] omit inode from ContentIdentifier for directory special remote Directory special remotes with importtree=yes now avoid unncessary overhead when inodes of files have changed, as happens whenever a FAT filesystem gets remounted. A few unusual edge cases of modifications won't be detected and imported. I think they're unusual enough not to be a concern. It would be possible to add a config setting that controls whether to compare inodes too, but does not seem worth bothering the user about currently. I chose to continue to use the InodeCache serialization, just with the inode zeroed. This way, if I later change my mind or make it configurable, can parse it back to an InodeCache and operate on it. The overhead of storing a 0 in the content identifier log seems worth it. There is a one-time cost to this change; all directory special remotes with importtree=yes will re-hash all files once, and will update the content identifier logs with zeroed inodes. This commit was sponsored by Brett Eisenberg on Patreon. --- CHANGELOG | 3 +++ Remote/Directory.hs | 21 +++++++++++-------- Utility/InodeCache.hs | 8 +++++-- ..._fb21865b36819a54a23e1ccb7f377fa1._comment | 11 ++++++++++ ...cessary_work_due_to_inode_instability.mdwn | 2 ++ 5 files changed, 34 insertions(+), 11 deletions(-) create mode 100644 doc/bugs/Directory_remotes_with_same_mount_point/comment_5_fb21865b36819a54a23e1ccb7f377fa1._comment diff --git a/CHANGELOG b/CHANGELOG index bcdfb29de2..fbb6904690 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -30,6 +30,9 @@ git-annex (8.20201130) UNRELEASED; urgency=medium Thanks, Grond for the patch. * Avoid crashing when there are remotes using unparseable urls. Including the non-standard URI form that git-remote-gcrypt uses for rsync. + * Directory special remotes with importtree=yes now avoid unncessary + overhead when inodes of files have changed, as happens whenever a FAT + filesystem gets remounted. -- Joey Hess Mon, 04 Jan 2021 12:52:41 -0400 diff --git a/Remote/Directory.hs b/Remote/Directory.hs index ae3b1de4a0..c4f8dcf45d 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -1,6 +1,6 @@ {- A "remote" that is just a filesystem directory. - - - Copyright 2011-2020 Joey Hess + - Copyright 2011-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -354,18 +354,21 @@ listImportableContentsM dir = liftIO $ do sz <- getFileSize' f st return $ Just (mkImportLocation relf, (cid, sz)) --- Make a ContentIdentifier that contains an InodeCache. --- --- The InodeCache is generated without checking a sentinal file. --- So in a case when a remount etc causes all the inodes to change, --- files may appear to be modified when they are not, which will only --- result in extra work to re-import them. --- +-- Make a ContentIdentifier that contains the size and mtime of the file. -- If the file is not a regular file, this will return Nothing. +-- +-- The inode is zeroed because often this is used for import from a +-- FAT filesystem, whose inodes change each time it's mounted, and +-- including inodes would cause repeated re-hashing of files, and +-- bloat the git-annex branch with changes to content identifier logs. +-- +-- This does mean that swaps of two files with the same size and +-- mtime won't be noticed, nor will modifications to files that +-- preserve the size and mtime. Both very unlikely so acceptable. mkContentIdentifier :: RawFilePath -> FileStatus -> IO (Maybe ContentIdentifier) mkContentIdentifier f st = fmap (ContentIdentifier . encodeBS . showInodeCache) - <$> toInodeCache noTSDelta f st + <$> toInodeCache' noTSDelta f st 0 guardSameContentIdentifiers :: a -> ContentIdentifier -> Maybe ContentIdentifier -> a guardSameContentIdentifiers cont old new diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index 74c6dffb49..9a21c632ad 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -24,6 +24,7 @@ module Utility.InodeCache ( showInodeCache, genInodeCache, toInodeCache, + toInodeCache', InodeCacheKey, inodeCacheToKey, @@ -189,7 +190,10 @@ genInodeCache f delta = catchDefaultIO Nothing $ toInodeCache delta f =<< R.getFileStatus f toInodeCache :: TSDelta -> RawFilePath -> FileStatus -> IO (Maybe InodeCache) -toInodeCache (TSDelta getdelta) f s +toInodeCache d f s = toInodeCache' d f s (fileID s) + +toInodeCache' :: TSDelta -> RawFilePath -> FileStatus -> FileID -> IO (Maybe InodeCache) +toInodeCache' (TSDelta getdelta) f s inode | isRegularFile s = do delta <- getdelta sz <- getFileSize' f s @@ -198,7 +202,7 @@ toInodeCache (TSDelta getdelta) f s #else let mtime = modificationTimeHiRes s #endif - return $ Just $ InodeCache $ InodeCachePrim (fileID s) sz (MTimeHighRes (mtime + highResTime delta)) + return $ Just $ InodeCache $ InodeCachePrim inode sz (MTimeHighRes (mtime + highResTime delta)) | otherwise = pure Nothing {- Some filesystem get new random inodes each time they are mounted. diff --git a/doc/bugs/Directory_remotes_with_same_mount_point/comment_5_fb21865b36819a54a23e1ccb7f377fa1._comment b/doc/bugs/Directory_remotes_with_same_mount_point/comment_5_fb21865b36819a54a23e1ccb7f377fa1._comment new file mode 100644 index 0000000000..d9e34d4eb8 --- /dev/null +++ b/doc/bugs/Directory_remotes_with_same_mount_point/comment_5_fb21865b36819a54a23e1ccb7f377fa1._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2021-01-19T16:53:54Z" + content=""" +Well, the changes I made for that todo make changes to inodes due to +remounting in the middle of an import not cause behavior like this. Of +course I don't know that's what caused this behavior, but it does seem +likely those changes would turn out to have fixed this, if we understood +how to reproduce the problem. +"""]] diff --git a/doc/todo/import_tree_from_FAT_does_unncessary_work_due_to_inode_instability.mdwn b/doc/todo/import_tree_from_FAT_does_unncessary_work_due_to_inode_instability.mdwn index d63b3e0c5b..17993af5da 100644 --- a/doc/todo/import_tree_from_FAT_does_unncessary_work_due_to_inode_instability.mdwn +++ b/doc/todo/import_tree_from_FAT_does_unncessary_work_due_to_inode_instability.mdwn @@ -26,3 +26,5 @@ an inode sentinal file and checking it to tell when inodes have changed would need importing to write to the drive. That seems strange, and the drive could even be read-only. May be the directory special remote should just not use inode numbers at all? + +> [[done]] --[[Joey]]