high-res mtimes
Cache high-resolution mtimes for improved detection of modified files in v7 (and direct mode). Including on Windows. With back-compat support so old low-res mtimes won't break anything, and so the new information also won't break old versions of git-annex.
This commit is contained in:
parent
48af284872
commit
5ab0f48ffb
7 changed files with 109 additions and 31 deletions
|
@ -766,6 +766,10 @@ isUnmodified key f = go =<< geti
|
|||
|
||||
{- Cheap check if a file contains the unmodified content of the key,
|
||||
- only checking the InodeCache of the key.
|
||||
-
|
||||
- Note that, on systems not supporting high-resolution mtimes,
|
||||
- this may report a false positive when repeated edits are made to a file
|
||||
- within a small time window (eg 1 second).
|
||||
-}
|
||||
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
|
||||
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
|
||||
|
|
|
@ -53,6 +53,8 @@ git-annex (7.20181025) UNRELEASED; urgency=medium
|
|||
(Reversion introduced in version 6.20180112)
|
||||
* migrate: Fix failure to migrate from URL keys.
|
||||
(Reversion introduced in version 6.20180926)
|
||||
* Cache high-resolution mtimes for improved detection of modified files
|
||||
in v7 (and direct mode).
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Sat, 13 Oct 2018 00:52:02 -0400
|
||||
|
||||
|
|
|
@ -18,8 +18,10 @@ import Annex.Content
|
|||
import Annex.Perms
|
||||
import qualified Annex.Queue
|
||||
import qualified Database.Keys
|
||||
|
||||
#if ! defined(mingw32_HOST_OS)
|
||||
import Utility.Touch
|
||||
import System.Posix.Files
|
||||
#endif
|
||||
|
||||
cmd :: Command
|
||||
|
@ -93,7 +95,7 @@ fixSymlink file link = do
|
|||
liftIO $ do
|
||||
#if ! defined(mingw32_HOST_OS)
|
||||
-- preserve mtime of symlink
|
||||
mtime <- catchMaybeIO $ modificationTimeHighRes
|
||||
mtime <- catchMaybeIO $ modificationTimeHiRes
|
||||
<$> getSymbolicLinkStatus file
|
||||
#endif
|
||||
createDirectoryIfMissing True (parentDir file)
|
||||
|
|
|
@ -96,8 +96,8 @@ clean file = do
|
|||
then B.length b `seq` return ()
|
||||
else liftIO $ hClose stdin
|
||||
|
||||
-- Optimization when the file is already annexed
|
||||
-- and is unmodified.
|
||||
-- Optimization for the case when the file is already
|
||||
-- annexed and is unmodified.
|
||||
case oldkey of
|
||||
Nothing -> doingest oldkey
|
||||
Just ko -> ifM (isUnmodifiedCheap ko file)
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
{- Caching a file's inode, size, and modification time
|
||||
- to see when it's changed.
|
||||
-
|
||||
- Copyright 2013-2014 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE TypeSynonymInstances #-}
|
||||
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||
|
||||
module Utility.InodeCache (
|
||||
|
@ -38,15 +39,20 @@ module Utility.InodeCache (
|
|||
) where
|
||||
|
||||
import Common
|
||||
import Utility.TimeStamp
|
||||
import Utility.QuickCheck
|
||||
|
||||
import System.PosixCompat.Types
|
||||
import Data.Time.Clock.POSIX
|
||||
import Utility.QuickCheck
|
||||
|
||||
#ifdef mingw32_HOST_OS
|
||||
import Data.Word (Word64)
|
||||
import System.Directory
|
||||
#else
|
||||
import System.Posix.Files
|
||||
#endif
|
||||
|
||||
data InodeCachePrim = InodeCachePrim FileID Integer EpochTime
|
||||
data InodeCachePrim = InodeCachePrim FileID FileSize MTime
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
newtype InodeCache = InodeCache InodeCachePrim
|
||||
|
@ -68,12 +74,12 @@ compareStrong (InodeCache x) (InodeCache y) = x == y
|
|||
- The weak mtime comparison treats any mtimes that are within 2 seconds
|
||||
- of one-another as the same. This is because FAT has only a 2 second
|
||||
- resolution. When a FAT filesystem is used on Linux, higher resolution
|
||||
- timestamps are cached and used by Linux, but this is lost on unmount,
|
||||
- so after a remount, the timestamp can appear to have changed.
|
||||
- timestamps maybe be are cached and used by Linux, but they are lost
|
||||
- on unmount, so after a remount, the timestamp can appear to have changed.
|
||||
-}
|
||||
compareWeak :: InodeCache -> InodeCache -> Bool
|
||||
compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) =
|
||||
size1 == size2 && (abs (mtime1 - mtime2) < 2)
|
||||
size1 == size2 && (abs (lowResTime mtime1 - lowResTime mtime2) < 2)
|
||||
|
||||
compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool
|
||||
compareBy Strongly = compareStrong
|
||||
|
@ -92,23 +98,66 @@ inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
|
|||
inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
|
||||
|
||||
inodeCacheToMtime :: InodeCache -> POSIXTime
|
||||
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = realToFrac mtime
|
||||
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = highResTime mtime
|
||||
|
||||
{- For backwards compatability, support low-res mtime with no
|
||||
- fractional seconds. -}
|
||||
data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime
|
||||
deriving (Show, Ord)
|
||||
|
||||
{- A low-res time compares equal to any high-res time in the same second. -}
|
||||
instance Eq MTime where
|
||||
MTimeLowRes a == MTimeLowRes b = a == b
|
||||
MTimeHighRes a == MTimeHighRes b = a == b
|
||||
MTimeHighRes a == MTimeLowRes b = lowResTime a == b
|
||||
MTimeLowRes a == MTimeHighRes b = a == lowResTime b
|
||||
|
||||
class MultiResTime t where
|
||||
lowResTime :: t -> EpochTime
|
||||
highResTime :: t -> POSIXTime
|
||||
|
||||
instance MultiResTime EpochTime where
|
||||
lowResTime = id
|
||||
highResTime = realToFrac
|
||||
|
||||
instance MultiResTime POSIXTime where
|
||||
lowResTime = fromInteger . floor
|
||||
highResTime = id
|
||||
|
||||
instance MultiResTime MTime where
|
||||
lowResTime (MTimeLowRes t) = t
|
||||
lowResTime (MTimeHighRes t) = lowResTime t
|
||||
highResTime (MTimeLowRes t) = highResTime t
|
||||
highResTime (MTimeHighRes t) = t
|
||||
|
||||
showInodeCache :: InodeCache -> String
|
||||
showInodeCache (InodeCache (InodeCachePrim inode size mtime)) = unwords
|
||||
[ show inode
|
||||
, show size
|
||||
, show mtime
|
||||
]
|
||||
showInodeCache (InodeCache (InodeCachePrim inode size (MTimeHighRes mtime))) =
|
||||
let (t, d) = separate (== '.') (takeWhile (/= 's') (show mtime))
|
||||
in unwords
|
||||
[ show inode
|
||||
, show size
|
||||
, t
|
||||
, d
|
||||
]
|
||||
showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) =
|
||||
unwords
|
||||
[ show inode
|
||||
, show size
|
||||
, show mtime
|
||||
]
|
||||
|
||||
readInodeCache :: String -> Maybe InodeCache
|
||||
readInodeCache s = case words s of
|
||||
(inode:size:mtime:_) ->
|
||||
let prim = InodeCachePrim
|
||||
<$> readish inode
|
||||
<*> readish size
|
||||
<*> readish mtime
|
||||
in InodeCache <$> prim
|
||||
(inode:size:mtime:[]) -> do
|
||||
i <- readish inode
|
||||
sz <- readish size
|
||||
t <- readish mtime
|
||||
return $ InodeCache $ InodeCachePrim i sz (MTimeLowRes t)
|
||||
(inode:size:mtime:mtimedecimal:_) -> do
|
||||
i <- readish inode
|
||||
sz <- readish size
|
||||
t <- parsePOSIXTime' mtime mtimedecimal
|
||||
return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t)
|
||||
_ -> Nothing
|
||||
|
||||
genInodeCache :: FilePath -> TSDelta -> IO (Maybe InodeCache)
|
||||
|
@ -120,10 +169,12 @@ toInodeCache (TSDelta getdelta) f s
|
|||
| isRegularFile s = do
|
||||
delta <- getdelta
|
||||
sz <- getFileSize' f s
|
||||
return $ Just $ InodeCache $ InodeCachePrim
|
||||
(fileID s)
|
||||
sz
|
||||
(modificationTime s + delta)
|
||||
#ifdef mingw32_HOST_OS
|
||||
mtime <- MTimeHighRes . utcTimeToPOSIXSeconds <$> getModificationTime f
|
||||
#else
|
||||
let mtime = (MTimeHighRes (modificationTimeHiRes s + highResTime delta))
|
||||
#endif
|
||||
return $ Just $ InodeCache $ InodeCachePrim (fileID s) sz mtime
|
||||
| otherwise = pure Nothing
|
||||
|
||||
{- Some filesystem get new random inodes each time they are mounted.
|
||||
|
@ -195,7 +246,7 @@ checkSentinalFile s = do
|
|||
mnew <- gennewcache
|
||||
return $ case mnew of
|
||||
Just (InodeCache (InodeCachePrim _ _ currmtime)) ->
|
||||
oldmtime - currmtime
|
||||
lowResTime oldmtime - lowResTime currmtime
|
||||
Nothing -> 0
|
||||
#else
|
||||
unchanged = oldinode == newinode && oldsize == newsize && oldmtime == newmtime
|
||||
|
@ -211,10 +262,24 @@ instance Arbitrary InodeCache where
|
|||
let prim = InodeCachePrim
|
||||
<$> arbitrary
|
||||
<*> arbitrary
|
||||
-- timestamp cannot be negative
|
||||
<*> (abs . fromInteger <$> arbitrary)
|
||||
<*> arbitrary
|
||||
in InodeCache <$> prim
|
||||
|
||||
instance Arbitrary MTime where
|
||||
arbitrary = frequency
|
||||
-- timestamp is not usually negative
|
||||
[ (50, MTimeLowRes <$> (abs . fromInteger <$> arbitrary))
|
||||
, (50, MTimeHighRes <$> (abs <$> arbposixtime))
|
||||
]
|
||||
where
|
||||
-- include fractional part, which the usual instance does not
|
||||
arbposixtime = do
|
||||
t <- arbitrary
|
||||
f <- arbitrary
|
||||
return $ if f == 0
|
||||
then t
|
||||
else t + recip f
|
||||
|
||||
#ifdef mingw32_HOST_OS
|
||||
instance Arbitrary FileID where
|
||||
arbitrary = fromIntegral <$> (arbitrary :: Gen Word64)
|
||||
|
|
|
@ -31,7 +31,7 @@ instance (Arbitrary v, Ord v) => Arbitrary (S.Set v) where
|
|||
arbitrary = S.fromList <$> arbitrary
|
||||
#endif
|
||||
|
||||
{- Times before the epoch are excluded. -}
|
||||
{- Times before the epoch are excluded, and no fraction is included. -}
|
||||
instance Arbitrary POSIXTime where
|
||||
arbitrary = fromInteger <$> nonNegative arbitrarySizedIntegral
|
||||
|
||||
|
|
|
@ -6,13 +6,18 @@ started checking the InodeCache to see if a file is modified.
|
|||
|
||||
This means that modifying a file, running `git add`, then modifying again
|
||||
and `git add` within the same second won't stage the second version of the
|
||||
file.
|
||||
file. (Although luckily it also compares file size.)
|
||||
|
||||
I think that optimisation needs to be disabled when inode caches will be
|
||||
compared weakly, because 2 seconds is just too long. This will mean slow
|
||||
`git checkout` on FAT and also when a user moves a repo to a different
|
||||
filesystem. But I don't see a way to avoid it.
|
||||
|
||||
> Hmm, on second thought, that would mean every inAnnex on FAT
|
||||
> would need to checksum the content. That's just too slow to be practical.
|
||||
> `git annex fsck` will clean up if trusting the timestamps causes
|
||||
> it to make a mistake on FAT.
|
||||
|
||||
Otherwise, the problem can be fixed by using modificationTimeHiRes.
|
||||
|
||||
But! All existing InodeCaches would then appear to have changed. This would
|
||||
|
@ -36,4 +41,4 @@ What would work, w/o breaking back-compat is
|
|||
So the decimal part of the mtime becomes the 4th word and old
|
||||
versions of git-annex will ignore it.
|
||||
|
||||
--[[Joey]]
|
||||
> [[fixed|done]] --[[Joey]]
|
||||
|
|
Loading…
Reference in a new issue