high-res mtimes

Cache high-resolution mtimes for improved detection of modified files in v7
(and direct mode).

Including on Windows.

With back-compat support so old low-res mtimes won't break anything, and
so the new information also won't break old versions of git-annex.
This commit is contained in:
Joey Hess 2018-10-30 00:40:17 -04:00
parent 48af284872
commit 5ab0f48ffb
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 109 additions and 31 deletions

View file

@ -766,6 +766,10 @@ isUnmodified key f = go =<< geti
{- Cheap check if a file contains the unmodified content of the key,
- only checking the InodeCache of the key.
-
- Note that, on systems not supporting high-resolution mtimes,
- this may report a false positive when repeated edits are made to a file
- within a small time window (eg 1 second).
-}
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)

View file

@ -53,6 +53,8 @@ git-annex (7.20181025) UNRELEASED; urgency=medium
(Reversion introduced in version 6.20180112)
* migrate: Fix failure to migrate from URL keys.
(Reversion introduced in version 6.20180926)
* Cache high-resolution mtimes for improved detection of modified files
in v7 (and direct mode).
-- Joey Hess <id@joeyh.name> Sat, 13 Oct 2018 00:52:02 -0400

View file

@ -18,8 +18,10 @@ import Annex.Content
import Annex.Perms
import qualified Annex.Queue
import qualified Database.Keys
#if ! defined(mingw32_HOST_OS)
import Utility.Touch
import System.Posix.Files
#endif
cmd :: Command
@ -93,7 +95,7 @@ fixSymlink file link = do
liftIO $ do
#if ! defined(mingw32_HOST_OS)
-- preserve mtime of symlink
mtime <- catchMaybeIO $ modificationTimeHighRes
mtime <- catchMaybeIO $ modificationTimeHiRes
<$> getSymbolicLinkStatus file
#endif
createDirectoryIfMissing True (parentDir file)

View file

@ -96,8 +96,8 @@ clean file = do
then B.length b `seq` return ()
else liftIO $ hClose stdin
-- Optimization when the file is already annexed
-- and is unmodified.
-- Optimization for the case when the file is already
-- annexed and is unmodified.
case oldkey of
Nothing -> doingest oldkey
Just ko -> ifM (isUnmodifiedCheap ko file)

View file

@ -1,12 +1,13 @@
{- Caching a file's inode, size, and modification time
- to see when it's changed.
-
- Copyright 2013-2014 Joey Hess <id@joeyh.name>
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Utility.InodeCache (
@ -38,15 +39,20 @@ module Utility.InodeCache (
) where
import Common
import Utility.TimeStamp
import Utility.QuickCheck
import System.PosixCompat.Types
import Data.Time.Clock.POSIX
import Utility.QuickCheck
#ifdef mingw32_HOST_OS
import Data.Word (Word64)
import System.Directory
#else
import System.Posix.Files
#endif
data InodeCachePrim = InodeCachePrim FileID Integer EpochTime
data InodeCachePrim = InodeCachePrim FileID FileSize MTime
deriving (Show, Eq, Ord)
newtype InodeCache = InodeCache InodeCachePrim
@ -68,12 +74,12 @@ compareStrong (InodeCache x) (InodeCache y) = x == y
- The weak mtime comparison treats any mtimes that are within 2 seconds
- of one-another as the same. This is because FAT has only a 2 second
- resolution. When a FAT filesystem is used on Linux, higher resolution
- timestamps are cached and used by Linux, but this is lost on unmount,
- so after a remount, the timestamp can appear to have changed.
- timestamps maybe be are cached and used by Linux, but they are lost
- on unmount, so after a remount, the timestamp can appear to have changed.
-}
compareWeak :: InodeCache -> InodeCache -> Bool
compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) =
size1 == size2 && (abs (mtime1 - mtime2) < 2)
size1 == size2 && (abs (lowResTime mtime1 - lowResTime mtime2) < 2)
compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool
compareBy Strongly = compareStrong
@ -92,10 +98,49 @@ inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
inodeCacheToMtime :: InodeCache -> POSIXTime
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = realToFrac mtime
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = highResTime mtime
{- For backwards compatability, support low-res mtime with no
- fractional seconds. -}
data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime
deriving (Show, Ord)
{- A low-res time compares equal to any high-res time in the same second. -}
instance Eq MTime where
MTimeLowRes a == MTimeLowRes b = a == b
MTimeHighRes a == MTimeHighRes b = a == b
MTimeHighRes a == MTimeLowRes b = lowResTime a == b
MTimeLowRes a == MTimeHighRes b = a == lowResTime b
class MultiResTime t where
lowResTime :: t -> EpochTime
highResTime :: t -> POSIXTime
instance MultiResTime EpochTime where
lowResTime = id
highResTime = realToFrac
instance MultiResTime POSIXTime where
lowResTime = fromInteger . floor
highResTime = id
instance MultiResTime MTime where
lowResTime (MTimeLowRes t) = t
lowResTime (MTimeHighRes t) = lowResTime t
highResTime (MTimeLowRes t) = highResTime t
highResTime (MTimeHighRes t) = t
showInodeCache :: InodeCache -> String
showInodeCache (InodeCache (InodeCachePrim inode size mtime)) = unwords
showInodeCache (InodeCache (InodeCachePrim inode size (MTimeHighRes mtime))) =
let (t, d) = separate (== '.') (takeWhile (/= 's') (show mtime))
in unwords
[ show inode
, show size
, t
, d
]
showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) =
unwords
[ show inode
, show size
, show mtime
@ -103,12 +148,16 @@ showInodeCache (InodeCache (InodeCachePrim inode size mtime)) = unwords
readInodeCache :: String -> Maybe InodeCache
readInodeCache s = case words s of
(inode:size:mtime:_) ->
let prim = InodeCachePrim
<$> readish inode
<*> readish size
<*> readish mtime
in InodeCache <$> prim
(inode:size:mtime:[]) -> do
i <- readish inode
sz <- readish size
t <- readish mtime
return $ InodeCache $ InodeCachePrim i sz (MTimeLowRes t)
(inode:size:mtime:mtimedecimal:_) -> do
i <- readish inode
sz <- readish size
t <- parsePOSIXTime' mtime mtimedecimal
return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t)
_ -> Nothing
genInodeCache :: FilePath -> TSDelta -> IO (Maybe InodeCache)
@ -120,10 +169,12 @@ toInodeCache (TSDelta getdelta) f s
| isRegularFile s = do
delta <- getdelta
sz <- getFileSize' f s
return $ Just $ InodeCache $ InodeCachePrim
(fileID s)
sz
(modificationTime s + delta)
#ifdef mingw32_HOST_OS
mtime <- MTimeHighRes . utcTimeToPOSIXSeconds <$> getModificationTime f
#else
let mtime = (MTimeHighRes (modificationTimeHiRes s + highResTime delta))
#endif
return $ Just $ InodeCache $ InodeCachePrim (fileID s) sz mtime
| otherwise = pure Nothing
{- Some filesystem get new random inodes each time they are mounted.
@ -195,7 +246,7 @@ checkSentinalFile s = do
mnew <- gennewcache
return $ case mnew of
Just (InodeCache (InodeCachePrim _ _ currmtime)) ->
oldmtime - currmtime
lowResTime oldmtime - lowResTime currmtime
Nothing -> 0
#else
unchanged = oldinode == newinode && oldsize == newsize && oldmtime == newmtime
@ -211,10 +262,24 @@ instance Arbitrary InodeCache where
let prim = InodeCachePrim
<$> arbitrary
<*> arbitrary
-- timestamp cannot be negative
<*> (abs . fromInteger <$> arbitrary)
<*> arbitrary
in InodeCache <$> prim
instance Arbitrary MTime where
arbitrary = frequency
-- timestamp is not usually negative
[ (50, MTimeLowRes <$> (abs . fromInteger <$> arbitrary))
, (50, MTimeHighRes <$> (abs <$> arbposixtime))
]
where
-- include fractional part, which the usual instance does not
arbposixtime = do
t <- arbitrary
f <- arbitrary
return $ if f == 0
then t
else t + recip f
#ifdef mingw32_HOST_OS
instance Arbitrary FileID where
arbitrary = fromIntegral <$> (arbitrary :: Gen Word64)

View file

@ -31,7 +31,7 @@ instance (Arbitrary v, Ord v) => Arbitrary (S.Set v) where
arbitrary = S.fromList <$> arbitrary
#endif
{- Times before the epoch are excluded. -}
{- Times before the epoch are excluded, and no fraction is included. -}
instance Arbitrary POSIXTime where
arbitrary = fromInteger <$> nonNegative arbitrarySizedIntegral

View file

@ -6,13 +6,18 @@ started checking the InodeCache to see if a file is modified.
This means that modifying a file, running `git add`, then modifying again
and `git add` within the same second won't stage the second version of the
file.
file. (Although luckily it also compares file size.)
I think that optimisation needs to be disabled when inode caches will be
compared weakly, because 2 seconds is just too long. This will mean slow
`git checkout` on FAT and also when a user moves a repo to a different
filesystem. But I don't see a way to avoid it.
> Hmm, on second thought, that would mean every inAnnex on FAT
> would need to checksum the content. That's just too slow to be practical.
> `git annex fsck` will clean up if trusting the timestamps causes
> it to make a mistake on FAT.
Otherwise, the problem can be fixed by using modificationTimeHiRes.
But! All existing InodeCaches would then appear to have changed. This would
@ -36,4 +41,4 @@ What would work, w/o breaking back-compat is
So the decimal part of the mtime becomes the 4th word and old
versions of git-annex will ignore it.
--[[Joey]]
> [[fixed|done]] --[[Joey]]