git-annex/Utility/InodeCache.hs

312 lines
9.7 KiB
Haskell
Raw Normal View History

2014-06-11 20:17:01 +00:00
{- Caching a file's inode, size, and modification time
- to see when it's changed.
2013-02-14 20:17:40 +00:00
-
2019-10-30 19:16:03 +00:00
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
2013-02-14 20:17:40 +00:00
-
- License: BSD-2-clause
2013-02-14 20:17:40 +00:00
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
2014-06-11 20:17:01 +00:00
module Utility.InodeCache (
InodeCache,
mkInodeCache,
2014-06-11 20:17:01 +00:00
InodeComparisonType(..),
inodeCacheFileSize,
2014-06-11 20:17:01 +00:00
compareStrong,
compareWeak,
compareBy,
2014-06-11 20:17:01 +00:00
readInodeCache,
showInodeCache,
genInodeCache,
toInodeCache,
toInodeCache',
2014-06-11 20:17:01 +00:00
InodeCacheKey,
inodeCacheToKey,
2019-10-30 19:16:03 +00:00
inodeCacheToFileSize,
2014-06-11 20:17:01 +00:00
inodeCacheToMtime,
2019-10-30 19:16:03 +00:00
inodeCacheToEpochTime,
inodeCacheEpochTimeRange,
SentinalFile(..),
SentinalStatus(..),
TSDelta,
noTSDelta,
writeSentinalFile,
checkSentinalFile,
sentinalFileExists,
2014-06-11 20:17:01 +00:00
prop_read_show_inodecache
) where
2013-02-14 20:17:40 +00:00
import Common
import Utility.TimeStamp
import Utility.QuickCheck
import qualified Utility.RawFilePath as R
import System.PosixCompat.Types
import Data.Time.Clock.POSIX
2013-02-14 20:17:40 +00:00
#ifdef mingw32_HOST_OS
import Data.Word (Word64)
#else
import System.Posix.Files
#endif
data InodeCachePrim = InodeCachePrim FileID FileSize MTime
deriving (Show, Eq, Ord)
newtype InodeCache = InodeCache InodeCachePrim
deriving (Show)
mkInodeCache :: FileID -> FileSize -> POSIXTime -> InodeCache
mkInodeCache inode sz mtime = InodeCache $
InodeCachePrim inode sz (MTimeHighRes mtime)
inodeCacheFileSize :: InodeCache -> FileSize
inodeCacheFileSize (InodeCache (InodeCachePrim _ sz _)) = sz
{- Inode caches can be compared in two different ways, either weakly
- or strongly. -}
data InodeComparisonType = Weakly | Strongly
automatic conflict resolution for v6 unlocked files Several tricky parts: * When the conflict is just between the same key being locked and unlocked, the unlocked version wins, and the file is not renamed in this case. * Need to update associated file map when conflict resolution renames an unlocked file. * git merge runs the smudge filter on the conflicting file, and actually overwrites the file with the same content it had before, and so invalidates its inode cache. This makes it difficult to know when it's safe to remove such files as conflict cruft, without going so far as to compare their entire contents. Dealt with this by preventing the smudge filter from populating the file when a merge is run. However, that also prevents the smudge filter being run for non-conflicting files, so eg moving a file won't put its new content into place. * Ideally, if a merge or a merge conflict resolution renames an unlocked file, the file in the work tree can just be moved, rather than copying the content to a new worktree file. This is attempted to be done in merge conflict resolution, but due to git merge's behavior of running smudge filters, what actually seems to happen is the old worktree file with the content is deleted and rewritten as a pointer file, so doesn't get reused. So, this is probably not as efficient as it optimally could be. If that becomes a problem, could look into running the merge in a separate worktree and updating the real worktree more efficiently, similarly to the direct mode merge. However, the direct mode merge had a lot of bugs, and I'd rather not use that more error-prone method unless really needed.
2015-12-29 19:41:09 +00:00
deriving (Eq, Ord, Show)
{- Strong comparison, including inodes. -}
compareStrong :: InodeCache -> InodeCache -> Bool
compareStrong (InodeCache x) (InodeCache y) = x == y
2013-02-14 20:17:40 +00:00
{- Weak comparison of the inode caches, comparing the size and mtime,
- but not the actual inode. Useful when inodes have changed, perhaps
- due to some filesystems being remounted.
-
- The weak mtime comparison treats any mtimes that are within 2 seconds
add small delay to avoid problems on systems with low-resolution mtime I've seen intermittent failures of the test suite with v6 for a long time, it seems to have possibly gotten worse with the changes around v7. Or just being unlucky; all tests failed today. Seen on amd64 and i386 builders, repeatedly but intermittently: unused: FAIL (4.86s) Test.hs:928: git diff did not show changes to unlocked file And I think other such failures, all involving v7/v6 mode tests. I managed to reproduce the unused failure with --keep-failures, and inside the repo, git diff was indeed not showing any changes for the modified unlocked file. The two stats will be the same other than mtime; the old and new files have the same size and inode, since the test case writes to the file and then overwrites it. Indeed, notice the identical timestamps: builder@orca:~/gitbuilder/build/.t/tmprepo335$ echo 1 > foo; stat foo; echo 2 > foo; stat foo File: foo Size: 2 Blocks: 8 IO Block: 4096 regular file Device: 801h/2049d Inode: 3546179 Links: 1 Access: (0644/-rw-r--r--) Uid: ( 1000/ builder) Gid: ( 1000/ builder) Access: 2018-10-29 22:14:10.894942036 +0000 Modify: 2018-10-29 22:14:10.894942036 +0000 Change: 2018-10-29 22:14:10.894942036 +0000 Birth: - File: foo Size: 2 Blocks: 8 IO Block: 4096 regular file Device: 801h/2049d Inode: 3546179 Links: 1 Access: (0644/-rw-r--r--) Uid: ( 1000/ builder) Gid: ( 1000/ builder) Access: 2018-10-29 22:14:10.894942036 +0000 Modify: 2018-10-29 22:14:10.898942036 +0000 Change: 2018-10-29 22:14:10.898942036 +0000 Birth: - I'm seeing this in Linux VMs; it doesn't happen on my laptop. I've also not experienced the intermittent test suite failures on my laptop. So, I hope that this small delay will avoid the problem. Update: I didn't, indeed I then reproduced the same failure on my laptop, so it must be due to something else. But keeping this change anyway since not needing to worry about lowish-resolution mtime in the test suite seems worthwhile.
2018-10-29 22:42:20 +00:00
- of one-another as the same. This is because FAT has only a 2 second
- resolution. When a FAT filesystem is used on Linux, higher resolution
2019-10-23 16:32:46 +00:00
- timestamps maybe are cached and used by Linux, but they are lost
- on unmount, so after a remount, the timestamp can appear to have changed.
-}
compareWeak :: InodeCache -> InodeCache -> Bool
compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) =
size1 == size2 && (abs (lowResTime mtime1 - lowResTime mtime2) < 2)
compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool
compareBy Strongly = compareStrong
compareBy Weakly = compareWeak
{- For use in a Map; it's determined at creation time whether this
- uses strong or weak comparison for Eq. -}
data InodeCacheKey = InodeCacheKey InodeComparisonType InodeCachePrim
automatic conflict resolution for v6 unlocked files Several tricky parts: * When the conflict is just between the same key being locked and unlocked, the unlocked version wins, and the file is not renamed in this case. * Need to update associated file map when conflict resolution renames an unlocked file. * git merge runs the smudge filter on the conflicting file, and actually overwrites the file with the same content it had before, and so invalidates its inode cache. This makes it difficult to know when it's safe to remove such files as conflict cruft, without going so far as to compare their entire contents. Dealt with this by preventing the smudge filter from populating the file when a merge is run. However, that also prevents the smudge filter being run for non-conflicting files, so eg moving a file won't put its new content into place. * Ideally, if a merge or a merge conflict resolution renames an unlocked file, the file in the work tree can just be moved, rather than copying the content to a new worktree file. This is attempted to be done in merge conflict resolution, but due to git merge's behavior of running smudge filters, what actually seems to happen is the old worktree file with the content is deleted and rewritten as a pointer file, so doesn't get reused. So, this is probably not as efficient as it optimally could be. If that becomes a problem, could look into running the merge in a separate worktree and updating the real worktree more efficiently, similarly to the direct mode merge. However, the direct mode merge had a lot of bugs, and I'd rather not use that more error-prone method unless really needed.
2015-12-29 19:41:09 +00:00
deriving (Ord, Show)
instance Eq InodeCacheKey where
(InodeCacheKey ctx x) == (InodeCacheKey cty y) =
compareBy (maximum [ctx,cty]) (InodeCache x ) (InodeCache y)
inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
2019-10-30 19:16:03 +00:00
inodeCacheToFileSize :: InodeCache -> FileSize
inodeCacheToFileSize (InodeCache (InodeCachePrim _ sz _)) = sz
inodeCacheToMtime :: InodeCache -> POSIXTime
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = highResTime mtime
2019-10-30 19:16:03 +00:00
inodeCacheToEpochTime :: InodeCache -> EpochTime
inodeCacheToEpochTime (InodeCache (InodeCachePrim _ _ mtime)) = lowResTime mtime
-- Returns min, max EpochTime that weakly match the time of the InodeCache.
inodeCacheEpochTimeRange :: InodeCache -> (EpochTime, EpochTime)
inodeCacheEpochTimeRange i =
let t = inodeCacheToEpochTime i
in (t-1, t+1)
{- For backwards compatability, support low-res mtime with no
- fractional seconds. -}
data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime
deriving (Show, Ord)
{- A low-res time compares equal to any high-res time in the same second. -}
instance Eq MTime where
MTimeLowRes a == MTimeLowRes b = a == b
MTimeHighRes a == MTimeHighRes b = a == b
MTimeHighRes a == MTimeLowRes b = lowResTime a == b
MTimeLowRes a == MTimeHighRes b = a == lowResTime b
class MultiResTime t where
lowResTime :: t -> EpochTime
highResTime :: t -> POSIXTime
instance MultiResTime EpochTime where
lowResTime = id
highResTime = realToFrac
instance MultiResTime POSIXTime where
lowResTime = fromInteger . floor
highResTime = id
instance MultiResTime MTime where
lowResTime (MTimeLowRes t) = t
lowResTime (MTimeHighRes t) = lowResTime t
highResTime (MTimeLowRes t) = highResTime t
highResTime (MTimeHighRes t) = t
2013-02-14 20:17:40 +00:00
showInodeCache :: InodeCache -> String
showInodeCache (InodeCache (InodeCachePrim inode size (MTimeHighRes mtime))) =
let (t, d) = separate (== '.') (takeWhile (/= 's') (show mtime))
in unwords
[ show inode
, show size
, t
, d
]
showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) =
unwords
[ show inode
, show size
, show mtime
]
2013-02-14 20:17:40 +00:00
readInodeCache :: String -> Maybe InodeCache
readInodeCache s = case words s of
(inode:size:mtime:[]) -> do
i <- readish inode
sz <- readish size
t <- readish mtime
return $ InodeCache $ InodeCachePrim i sz (MTimeLowRes t)
(inode:size:mtime:mtimedecimal:_) -> do
i <- readish inode
sz <- readish size
t <- parsePOSIXTime $ mtime ++ '.' : mtimedecimal
return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t)
2013-02-14 20:17:40 +00:00
_ -> Nothing
genInodeCache :: RawFilePath -> TSDelta -> IO (Maybe InodeCache)
genInodeCache f delta = catchDefaultIO Nothing $
toInodeCache delta f =<< R.getFileStatus f
toInodeCache :: TSDelta -> RawFilePath -> FileStatus -> IO (Maybe InodeCache)
toInodeCache d f s = toInodeCache' d f s (fileID s)
toInodeCache' :: TSDelta -> RawFilePath -> FileStatus -> FileID -> IO (Maybe InodeCache)
toInodeCache' (TSDelta getdelta) f s inode
| isRegularFile s = do
delta <- getdelta
sz <- getFileSize' f s
#ifdef mingw32_HOST_OS
mtime <- utcTimeToPOSIXSeconds <$> getModificationTime (fromRawFilePath f)
#else
let mtime = modificationTimeHiRes s
#endif
return $ Just $ InodeCache $ InodeCachePrim inode sz (MTimeHighRes (mtime + highResTime delta))
| otherwise = pure Nothing
{- Some filesystem get new random inodes each time they are mounted.
- To detect this and other problems, a sentinal file can be created.
- Its InodeCache at the time of its creation is written to the cache file,
- so changes can later be detected. -}
data SentinalFile = SentinalFile
{ sentinalFile :: RawFilePath
, sentinalCacheFile :: RawFilePath
}
deriving (Show)
{- On Windows, the mtime of a file appears to change when the time zone is
- changed. To deal with this, a TSDelta can be used; the delta is added to
- the mtime when generating an InodeCache. The current delta can be found
- by looking at the SentinalFile. Effectively, this makes all InodeCaches
- use the same time zone that was in use when the sential file was
- originally written. -}
newtype TSDelta = TSDelta (IO EpochTime)
noTSDelta :: TSDelta
noTSDelta = TSDelta (pure 0)
writeSentinalFile :: SentinalFile -> IO ()
writeSentinalFile s = do
writeFile (fromRawFilePath (sentinalFile s)) ""
maybe noop (writeFile (fromRawFilePath (sentinalCacheFile s)) . showInodeCache)
=<< genInodeCache (sentinalFile s) noTSDelta
data SentinalStatus = SentinalStatus
{ sentinalInodesChanged :: Bool
, sentinalTSDelta :: TSDelta
}
{- Checks if the InodeCache of the sentinal file is the same
- as it was when it was originally created.
-
- On Windows, time stamp differences are ignored, since they change
- with the timezone.
-
- When the sential file does not exist, InodeCaches canot reliably be
- compared, so the assumption is that there is has been a change.
-}
checkSentinalFile :: SentinalFile -> IO SentinalStatus
checkSentinalFile s = do
mold <- loadoldcache
case mold of
Nothing -> return dummy
(Just old) -> do
mnew <- gennewcache
case mnew of
Nothing -> return dummy
Just new -> return $ calc old new
where
loadoldcache = catchDefaultIO Nothing $
readInodeCache <$> readFile (fromRawFilePath (sentinalCacheFile s))
gennewcache = genInodeCache (sentinalFile s) noTSDelta
calc (InodeCache (InodeCachePrim oldinode oldsize oldmtime)) (InodeCache (InodeCachePrim newinode newsize newmtime)) =
SentinalStatus (not unchanged) tsdelta
where
#ifdef mingw32_HOST_OS
-- Since mtime can appear to change when the time zone is
-- changed in windows, we cannot look at the mtime for the
-- sentinal file.
unchanged = oldinode == newinode && oldsize == newsize && (newmtime == newmtime)
tsdelta = TSDelta $ do
-- Run when generating an InodeCache,
-- to get the current delta.
mnew <- gennewcache
return $ case mnew of
Just (InodeCache (InodeCachePrim _ _ currmtime)) ->
lowResTime oldmtime - lowResTime currmtime
Nothing -> 0
#else
unchanged = oldinode == newinode && oldsize == newsize && oldmtime == newmtime
tsdelta = noTSDelta
#endif
dummy = SentinalStatus True noTSDelta
sentinalFileExists :: SentinalFile -> IO Bool
sentinalFileExists s = allM R.doesPathExist [sentinalCacheFile s, sentinalFile s]
instance Arbitrary InodeCache where
arbitrary =
let prim = InodeCachePrim
<$> arbitrary
<*> arbitrary
<*> arbitrary
in InodeCache <$> prim
instance Arbitrary MTime where
arbitrary = frequency
-- timestamp is not usually negative
[ (50, MTimeLowRes <$> (abs . fromInteger <$> arbitrary))
, (50, MTimeHighRes <$> arbitrary)
]
#ifdef mingw32_HOST_OS
instance Arbitrary FileID where
arbitrary = fromIntegral <$> (arbitrary :: Gen Word64)
#endif
prop_read_show_inodecache :: InodeCache -> Bool
prop_read_show_inodecache c = case readInodeCache (showInodeCache c) of
Nothing -> False
Just c' -> compareStrong c c'