high-res mtimes

Cache high-resolution mtimes for improved detection of modified files in v7
(and direct mode).

Including on Windows.

With back-compat support so old low-res mtimes won't break anything, and
so the new information also won't break old versions of git-annex.
This commit is contained in:
Joey Hess 2018-10-30 00:40:17 -04:00
parent 48af284872
commit 5ab0f48ffb
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 109 additions and 31 deletions

View file

@ -766,6 +766,10 @@ isUnmodified key f = go =<< geti
{- Cheap check if a file contains the unmodified content of the key, {- Cheap check if a file contains the unmodified content of the key,
- only checking the InodeCache of the key. - only checking the InodeCache of the key.
-
- Note that, on systems not supporting high-resolution mtimes,
- this may report a false positive when repeated edits are made to a file
- within a small time window (eg 1 second).
-} -}
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key) isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)

View file

@ -53,6 +53,8 @@ git-annex (7.20181025) UNRELEASED; urgency=medium
(Reversion introduced in version 6.20180112) (Reversion introduced in version 6.20180112)
* migrate: Fix failure to migrate from URL keys. * migrate: Fix failure to migrate from URL keys.
(Reversion introduced in version 6.20180926) (Reversion introduced in version 6.20180926)
* Cache high-resolution mtimes for improved detection of modified files
in v7 (and direct mode).
-- Joey Hess <id@joeyh.name> Sat, 13 Oct 2018 00:52:02 -0400 -- Joey Hess <id@joeyh.name> Sat, 13 Oct 2018 00:52:02 -0400

View file

@ -18,8 +18,10 @@ import Annex.Content
import Annex.Perms import Annex.Perms
import qualified Annex.Queue import qualified Annex.Queue
import qualified Database.Keys import qualified Database.Keys
#if ! defined(mingw32_HOST_OS) #if ! defined(mingw32_HOST_OS)
import Utility.Touch import Utility.Touch
import System.Posix.Files
#endif #endif
cmd :: Command cmd :: Command
@ -93,7 +95,7 @@ fixSymlink file link = do
liftIO $ do liftIO $ do
#if ! defined(mingw32_HOST_OS) #if ! defined(mingw32_HOST_OS)
-- preserve mtime of symlink -- preserve mtime of symlink
mtime <- catchMaybeIO $ modificationTimeHighRes mtime <- catchMaybeIO $ modificationTimeHiRes
<$> getSymbolicLinkStatus file <$> getSymbolicLinkStatus file
#endif #endif
createDirectoryIfMissing True (parentDir file) createDirectoryIfMissing True (parentDir file)

View file

@ -96,8 +96,8 @@ clean file = do
then B.length b `seq` return () then B.length b `seq` return ()
else liftIO $ hClose stdin else liftIO $ hClose stdin
-- Optimization when the file is already annexed -- Optimization for the case when the file is already
-- and is unmodified. -- annexed and is unmodified.
case oldkey of case oldkey of
Nothing -> doingest oldkey Nothing -> doingest oldkey
Just ko -> ifM (isUnmodifiedCheap ko file) Just ko -> ifM (isUnmodifiedCheap ko file)

View file

@ -1,12 +1,13 @@
{- Caching a file's inode, size, and modification time {- Caching a file's inode, size, and modification time
- to see when it's changed. - to see when it's changed.
- -
- Copyright 2013-2014 Joey Hess <id@joeyh.name> - Copyright 2013-2018 Joey Hess <id@joeyh.name>
- -
- License: BSD-2-clause - License: BSD-2-clause
-} -}
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
module Utility.InodeCache ( module Utility.InodeCache (
@ -38,15 +39,20 @@ module Utility.InodeCache (
) where ) where
import Common import Common
import Utility.TimeStamp
import Utility.QuickCheck
import System.PosixCompat.Types import System.PosixCompat.Types
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
import Utility.QuickCheck
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
import Data.Word (Word64) import Data.Word (Word64)
import System.Directory
#else
import System.Posix.Files
#endif #endif
data InodeCachePrim = InodeCachePrim FileID Integer EpochTime data InodeCachePrim = InodeCachePrim FileID FileSize MTime
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord)
newtype InodeCache = InodeCache InodeCachePrim newtype InodeCache = InodeCache InodeCachePrim
@ -68,12 +74,12 @@ compareStrong (InodeCache x) (InodeCache y) = x == y
- The weak mtime comparison treats any mtimes that are within 2 seconds - The weak mtime comparison treats any mtimes that are within 2 seconds
- of one-another as the same. This is because FAT has only a 2 second - of one-another as the same. This is because FAT has only a 2 second
- resolution. When a FAT filesystem is used on Linux, higher resolution - resolution. When a FAT filesystem is used on Linux, higher resolution
- timestamps are cached and used by Linux, but this is lost on unmount, - timestamps maybe be are cached and used by Linux, but they are lost
- so after a remount, the timestamp can appear to have changed. - on unmount, so after a remount, the timestamp can appear to have changed.
-} -}
compareWeak :: InodeCache -> InodeCache -> Bool compareWeak :: InodeCache -> InodeCache -> Bool
compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) = compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) =
size1 == size2 && (abs (mtime1 - mtime2) < 2) size1 == size2 && (abs (lowResTime mtime1 - lowResTime mtime2) < 2)
compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool
compareBy Strongly = compareStrong compareBy Strongly = compareStrong
@ -92,23 +98,66 @@ inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
inodeCacheToMtime :: InodeCache -> POSIXTime inodeCacheToMtime :: InodeCache -> POSIXTime
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = realToFrac mtime inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = highResTime mtime
{- For backwards compatability, support low-res mtime with no
- fractional seconds. -}
data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime
deriving (Show, Ord)
{- A low-res time compares equal to any high-res time in the same second. -}
instance Eq MTime where
MTimeLowRes a == MTimeLowRes b = a == b
MTimeHighRes a == MTimeHighRes b = a == b
MTimeHighRes a == MTimeLowRes b = lowResTime a == b
MTimeLowRes a == MTimeHighRes b = a == lowResTime b
class MultiResTime t where
lowResTime :: t -> EpochTime
highResTime :: t -> POSIXTime
instance MultiResTime EpochTime where
lowResTime = id
highResTime = realToFrac
instance MultiResTime POSIXTime where
lowResTime = fromInteger . floor
highResTime = id
instance MultiResTime MTime where
lowResTime (MTimeLowRes t) = t
lowResTime (MTimeHighRes t) = lowResTime t
highResTime (MTimeLowRes t) = highResTime t
highResTime (MTimeHighRes t) = t
showInodeCache :: InodeCache -> String showInodeCache :: InodeCache -> String
showInodeCache (InodeCache (InodeCachePrim inode size mtime)) = unwords showInodeCache (InodeCache (InodeCachePrim inode size (MTimeHighRes mtime))) =
[ show inode let (t, d) = separate (== '.') (takeWhile (/= 's') (show mtime))
, show size in unwords
, show mtime [ show inode
] , show size
, t
, d
]
showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) =
unwords
[ show inode
, show size
, show mtime
]
readInodeCache :: String -> Maybe InodeCache readInodeCache :: String -> Maybe InodeCache
readInodeCache s = case words s of readInodeCache s = case words s of
(inode:size:mtime:_) -> (inode:size:mtime:[]) -> do
let prim = InodeCachePrim i <- readish inode
<$> readish inode sz <- readish size
<*> readish size t <- readish mtime
<*> readish mtime return $ InodeCache $ InodeCachePrim i sz (MTimeLowRes t)
in InodeCache <$> prim (inode:size:mtime:mtimedecimal:_) -> do
i <- readish inode
sz <- readish size
t <- parsePOSIXTime' mtime mtimedecimal
return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t)
_ -> Nothing _ -> Nothing
genInodeCache :: FilePath -> TSDelta -> IO (Maybe InodeCache) genInodeCache :: FilePath -> TSDelta -> IO (Maybe InodeCache)
@ -120,10 +169,12 @@ toInodeCache (TSDelta getdelta) f s
| isRegularFile s = do | isRegularFile s = do
delta <- getdelta delta <- getdelta
sz <- getFileSize' f s sz <- getFileSize' f s
return $ Just $ InodeCache $ InodeCachePrim #ifdef mingw32_HOST_OS
(fileID s) mtime <- MTimeHighRes . utcTimeToPOSIXSeconds <$> getModificationTime f
sz #else
(modificationTime s + delta) let mtime = (MTimeHighRes (modificationTimeHiRes s + highResTime delta))
#endif
return $ Just $ InodeCache $ InodeCachePrim (fileID s) sz mtime
| otherwise = pure Nothing | otherwise = pure Nothing
{- Some filesystem get new random inodes each time they are mounted. {- Some filesystem get new random inodes each time they are mounted.
@ -195,7 +246,7 @@ checkSentinalFile s = do
mnew <- gennewcache mnew <- gennewcache
return $ case mnew of return $ case mnew of
Just (InodeCache (InodeCachePrim _ _ currmtime)) -> Just (InodeCache (InodeCachePrim _ _ currmtime)) ->
oldmtime - currmtime lowResTime oldmtime - lowResTime currmtime
Nothing -> 0 Nothing -> 0
#else #else
unchanged = oldinode == newinode && oldsize == newsize && oldmtime == newmtime unchanged = oldinode == newinode && oldsize == newsize && oldmtime == newmtime
@ -211,10 +262,24 @@ instance Arbitrary InodeCache where
let prim = InodeCachePrim let prim = InodeCachePrim
<$> arbitrary <$> arbitrary
<*> arbitrary <*> arbitrary
-- timestamp cannot be negative <*> arbitrary
<*> (abs . fromInteger <$> arbitrary)
in InodeCache <$> prim in InodeCache <$> prim
instance Arbitrary MTime where
arbitrary = frequency
-- timestamp is not usually negative
[ (50, MTimeLowRes <$> (abs . fromInteger <$> arbitrary))
, (50, MTimeHighRes <$> (abs <$> arbposixtime))
]
where
-- include fractional part, which the usual instance does not
arbposixtime = do
t <- arbitrary
f <- arbitrary
return $ if f == 0
then t
else t + recip f
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
instance Arbitrary FileID where instance Arbitrary FileID where
arbitrary = fromIntegral <$> (arbitrary :: Gen Word64) arbitrary = fromIntegral <$> (arbitrary :: Gen Word64)

View file

@ -31,7 +31,7 @@ instance (Arbitrary v, Ord v) => Arbitrary (S.Set v) where
arbitrary = S.fromList <$> arbitrary arbitrary = S.fromList <$> arbitrary
#endif #endif
{- Times before the epoch are excluded. -} {- Times before the epoch are excluded, and no fraction is included. -}
instance Arbitrary POSIXTime where instance Arbitrary POSIXTime where
arbitrary = fromInteger <$> nonNegative arbitrarySizedIntegral arbitrary = fromInteger <$> nonNegative arbitrarySizedIntegral

View file

@ -6,13 +6,18 @@ started checking the InodeCache to see if a file is modified.
This means that modifying a file, running `git add`, then modifying again This means that modifying a file, running `git add`, then modifying again
and `git add` within the same second won't stage the second version of the and `git add` within the same second won't stage the second version of the
file. file. (Although luckily it also compares file size.)
I think that optimisation needs to be disabled when inode caches will be I think that optimisation needs to be disabled when inode caches will be
compared weakly, because 2 seconds is just too long. This will mean slow compared weakly, because 2 seconds is just too long. This will mean slow
`git checkout` on FAT and also when a user moves a repo to a different `git checkout` on FAT and also when a user moves a repo to a different
filesystem. But I don't see a way to avoid it. filesystem. But I don't see a way to avoid it.
> Hmm, on second thought, that would mean every inAnnex on FAT
> would need to checksum the content. That's just too slow to be practical.
> `git annex fsck` will clean up if trusting the timestamps causes
> it to make a mistake on FAT.
Otherwise, the problem can be fixed by using modificationTimeHiRes. Otherwise, the problem can be fixed by using modificationTimeHiRes.
But! All existing InodeCaches would then appear to have changed. This would But! All existing InodeCaches would then appear to have changed. This would
@ -36,4 +41,4 @@ What would work, w/o breaking back-compat is
So the decimal part of the mtime becomes the 4th word and old So the decimal part of the mtime becomes the 4th word and old
versions of git-annex will ignore it. versions of git-annex will ignore it.
--[[Joey]] > [[fixed|done]] --[[Joey]]