high-res mtimes
Cache high-resolution mtimes for improved detection of modified files in v7 (and direct mode). Including on Windows. With back-compat support so old low-res mtimes won't break anything, and so the new information also won't break old versions of git-annex.
This commit is contained in:
parent
48af284872
commit
5ab0f48ffb
7 changed files with 109 additions and 31 deletions
|
@ -766,6 +766,10 @@ isUnmodified key f = go =<< geti
|
||||||
|
|
||||||
{- Cheap check if a file contains the unmodified content of the key,
|
{- Cheap check if a file contains the unmodified content of the key,
|
||||||
- only checking the InodeCache of the key.
|
- only checking the InodeCache of the key.
|
||||||
|
-
|
||||||
|
- Note that, on systems not supporting high-resolution mtimes,
|
||||||
|
- this may report a false positive when repeated edits are made to a file
|
||||||
|
- within a small time window (eg 1 second).
|
||||||
-}
|
-}
|
||||||
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
|
isUnmodifiedCheap :: Key -> FilePath -> Annex Bool
|
||||||
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
|
isUnmodifiedCheap key f = maybe (return False) (isUnmodifiedCheap' key)
|
||||||
|
|
|
@ -53,6 +53,8 @@ git-annex (7.20181025) UNRELEASED; urgency=medium
|
||||||
(Reversion introduced in version 6.20180112)
|
(Reversion introduced in version 6.20180112)
|
||||||
* migrate: Fix failure to migrate from URL keys.
|
* migrate: Fix failure to migrate from URL keys.
|
||||||
(Reversion introduced in version 6.20180926)
|
(Reversion introduced in version 6.20180926)
|
||||||
|
* Cache high-resolution mtimes for improved detection of modified files
|
||||||
|
in v7 (and direct mode).
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Sat, 13 Oct 2018 00:52:02 -0400
|
-- Joey Hess <id@joeyh.name> Sat, 13 Oct 2018 00:52:02 -0400
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,10 @@ import Annex.Content
|
||||||
import Annex.Perms
|
import Annex.Perms
|
||||||
import qualified Annex.Queue
|
import qualified Annex.Queue
|
||||||
import qualified Database.Keys
|
import qualified Database.Keys
|
||||||
|
|
||||||
#if ! defined(mingw32_HOST_OS)
|
#if ! defined(mingw32_HOST_OS)
|
||||||
import Utility.Touch
|
import Utility.Touch
|
||||||
|
import System.Posix.Files
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
cmd :: Command
|
cmd :: Command
|
||||||
|
@ -93,7 +95,7 @@ fixSymlink file link = do
|
||||||
liftIO $ do
|
liftIO $ do
|
||||||
#if ! defined(mingw32_HOST_OS)
|
#if ! defined(mingw32_HOST_OS)
|
||||||
-- preserve mtime of symlink
|
-- preserve mtime of symlink
|
||||||
mtime <- catchMaybeIO $ modificationTimeHighRes
|
mtime <- catchMaybeIO $ modificationTimeHiRes
|
||||||
<$> getSymbolicLinkStatus file
|
<$> getSymbolicLinkStatus file
|
||||||
#endif
|
#endif
|
||||||
createDirectoryIfMissing True (parentDir file)
|
createDirectoryIfMissing True (parentDir file)
|
||||||
|
|
|
@ -96,8 +96,8 @@ clean file = do
|
||||||
then B.length b `seq` return ()
|
then B.length b `seq` return ()
|
||||||
else liftIO $ hClose stdin
|
else liftIO $ hClose stdin
|
||||||
|
|
||||||
-- Optimization when the file is already annexed
|
-- Optimization for the case when the file is already
|
||||||
-- and is unmodified.
|
-- annexed and is unmodified.
|
||||||
case oldkey of
|
case oldkey of
|
||||||
Nothing -> doingest oldkey
|
Nothing -> doingest oldkey
|
||||||
Just ko -> ifM (isUnmodifiedCheap ko file)
|
Just ko -> ifM (isUnmodifiedCheap ko file)
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
{- Caching a file's inode, size, and modification time
|
{- Caching a file's inode, size, and modification time
|
||||||
- to see when it's changed.
|
- to see when it's changed.
|
||||||
-
|
-
|
||||||
- Copyright 2013-2014 Joey Hess <id@joeyh.name>
|
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- License: BSD-2-clause
|
- License: BSD-2-clause
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-# LANGUAGE CPP #-}
|
{-# LANGUAGE CPP #-}
|
||||||
|
{-# LANGUAGE TypeSynonymInstances #-}
|
||||||
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||||
|
|
||||||
module Utility.InodeCache (
|
module Utility.InodeCache (
|
||||||
|
@ -38,15 +39,20 @@ module Utility.InodeCache (
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
|
import Utility.TimeStamp
|
||||||
|
import Utility.QuickCheck
|
||||||
|
|
||||||
import System.PosixCompat.Types
|
import System.PosixCompat.Types
|
||||||
import Data.Time.Clock.POSIX
|
import Data.Time.Clock.POSIX
|
||||||
import Utility.QuickCheck
|
|
||||||
|
|
||||||
#ifdef mingw32_HOST_OS
|
#ifdef mingw32_HOST_OS
|
||||||
import Data.Word (Word64)
|
import Data.Word (Word64)
|
||||||
|
import System.Directory
|
||||||
|
#else
|
||||||
|
import System.Posix.Files
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
data InodeCachePrim = InodeCachePrim FileID Integer EpochTime
|
data InodeCachePrim = InodeCachePrim FileID FileSize MTime
|
||||||
deriving (Show, Eq, Ord)
|
deriving (Show, Eq, Ord)
|
||||||
|
|
||||||
newtype InodeCache = InodeCache InodeCachePrim
|
newtype InodeCache = InodeCache InodeCachePrim
|
||||||
|
@ -68,12 +74,12 @@ compareStrong (InodeCache x) (InodeCache y) = x == y
|
||||||
- The weak mtime comparison treats any mtimes that are within 2 seconds
|
- The weak mtime comparison treats any mtimes that are within 2 seconds
|
||||||
- of one-another as the same. This is because FAT has only a 2 second
|
- of one-another as the same. This is because FAT has only a 2 second
|
||||||
- resolution. When a FAT filesystem is used on Linux, higher resolution
|
- resolution. When a FAT filesystem is used on Linux, higher resolution
|
||||||
- timestamps are cached and used by Linux, but this is lost on unmount,
|
- timestamps maybe be are cached and used by Linux, but they are lost
|
||||||
- so after a remount, the timestamp can appear to have changed.
|
- on unmount, so after a remount, the timestamp can appear to have changed.
|
||||||
-}
|
-}
|
||||||
compareWeak :: InodeCache -> InodeCache -> Bool
|
compareWeak :: InodeCache -> InodeCache -> Bool
|
||||||
compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) =
|
compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) =
|
||||||
size1 == size2 && (abs (mtime1 - mtime2) < 2)
|
size1 == size2 && (abs (lowResTime mtime1 - lowResTime mtime2) < 2)
|
||||||
|
|
||||||
compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool
|
compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool
|
||||||
compareBy Strongly = compareStrong
|
compareBy Strongly = compareStrong
|
||||||
|
@ -92,23 +98,66 @@ inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
|
||||||
inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
|
inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
|
||||||
|
|
||||||
inodeCacheToMtime :: InodeCache -> POSIXTime
|
inodeCacheToMtime :: InodeCache -> POSIXTime
|
||||||
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = realToFrac mtime
|
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = highResTime mtime
|
||||||
|
|
||||||
|
{- For backwards compatability, support low-res mtime with no
|
||||||
|
- fractional seconds. -}
|
||||||
|
data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime
|
||||||
|
deriving (Show, Ord)
|
||||||
|
|
||||||
|
{- A low-res time compares equal to any high-res time in the same second. -}
|
||||||
|
instance Eq MTime where
|
||||||
|
MTimeLowRes a == MTimeLowRes b = a == b
|
||||||
|
MTimeHighRes a == MTimeHighRes b = a == b
|
||||||
|
MTimeHighRes a == MTimeLowRes b = lowResTime a == b
|
||||||
|
MTimeLowRes a == MTimeHighRes b = a == lowResTime b
|
||||||
|
|
||||||
|
class MultiResTime t where
|
||||||
|
lowResTime :: t -> EpochTime
|
||||||
|
highResTime :: t -> POSIXTime
|
||||||
|
|
||||||
|
instance MultiResTime EpochTime where
|
||||||
|
lowResTime = id
|
||||||
|
highResTime = realToFrac
|
||||||
|
|
||||||
|
instance MultiResTime POSIXTime where
|
||||||
|
lowResTime = fromInteger . floor
|
||||||
|
highResTime = id
|
||||||
|
|
||||||
|
instance MultiResTime MTime where
|
||||||
|
lowResTime (MTimeLowRes t) = t
|
||||||
|
lowResTime (MTimeHighRes t) = lowResTime t
|
||||||
|
highResTime (MTimeLowRes t) = highResTime t
|
||||||
|
highResTime (MTimeHighRes t) = t
|
||||||
|
|
||||||
showInodeCache :: InodeCache -> String
|
showInodeCache :: InodeCache -> String
|
||||||
showInodeCache (InodeCache (InodeCachePrim inode size mtime)) = unwords
|
showInodeCache (InodeCache (InodeCachePrim inode size (MTimeHighRes mtime))) =
|
||||||
[ show inode
|
let (t, d) = separate (== '.') (takeWhile (/= 's') (show mtime))
|
||||||
, show size
|
in unwords
|
||||||
, show mtime
|
[ show inode
|
||||||
]
|
, show size
|
||||||
|
, t
|
||||||
|
, d
|
||||||
|
]
|
||||||
|
showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) =
|
||||||
|
unwords
|
||||||
|
[ show inode
|
||||||
|
, show size
|
||||||
|
, show mtime
|
||||||
|
]
|
||||||
|
|
||||||
readInodeCache :: String -> Maybe InodeCache
|
readInodeCache :: String -> Maybe InodeCache
|
||||||
readInodeCache s = case words s of
|
readInodeCache s = case words s of
|
||||||
(inode:size:mtime:_) ->
|
(inode:size:mtime:[]) -> do
|
||||||
let prim = InodeCachePrim
|
i <- readish inode
|
||||||
<$> readish inode
|
sz <- readish size
|
||||||
<*> readish size
|
t <- readish mtime
|
||||||
<*> readish mtime
|
return $ InodeCache $ InodeCachePrim i sz (MTimeLowRes t)
|
||||||
in InodeCache <$> prim
|
(inode:size:mtime:mtimedecimal:_) -> do
|
||||||
|
i <- readish inode
|
||||||
|
sz <- readish size
|
||||||
|
t <- parsePOSIXTime' mtime mtimedecimal
|
||||||
|
return $ InodeCache $ InodeCachePrim i sz (MTimeHighRes t)
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
|
||||||
genInodeCache :: FilePath -> TSDelta -> IO (Maybe InodeCache)
|
genInodeCache :: FilePath -> TSDelta -> IO (Maybe InodeCache)
|
||||||
|
@ -120,10 +169,12 @@ toInodeCache (TSDelta getdelta) f s
|
||||||
| isRegularFile s = do
|
| isRegularFile s = do
|
||||||
delta <- getdelta
|
delta <- getdelta
|
||||||
sz <- getFileSize' f s
|
sz <- getFileSize' f s
|
||||||
return $ Just $ InodeCache $ InodeCachePrim
|
#ifdef mingw32_HOST_OS
|
||||||
(fileID s)
|
mtime <- MTimeHighRes . utcTimeToPOSIXSeconds <$> getModificationTime f
|
||||||
sz
|
#else
|
||||||
(modificationTime s + delta)
|
let mtime = (MTimeHighRes (modificationTimeHiRes s + highResTime delta))
|
||||||
|
#endif
|
||||||
|
return $ Just $ InodeCache $ InodeCachePrim (fileID s) sz mtime
|
||||||
| otherwise = pure Nothing
|
| otherwise = pure Nothing
|
||||||
|
|
||||||
{- Some filesystem get new random inodes each time they are mounted.
|
{- Some filesystem get new random inodes each time they are mounted.
|
||||||
|
@ -195,7 +246,7 @@ checkSentinalFile s = do
|
||||||
mnew <- gennewcache
|
mnew <- gennewcache
|
||||||
return $ case mnew of
|
return $ case mnew of
|
||||||
Just (InodeCache (InodeCachePrim _ _ currmtime)) ->
|
Just (InodeCache (InodeCachePrim _ _ currmtime)) ->
|
||||||
oldmtime - currmtime
|
lowResTime oldmtime - lowResTime currmtime
|
||||||
Nothing -> 0
|
Nothing -> 0
|
||||||
#else
|
#else
|
||||||
unchanged = oldinode == newinode && oldsize == newsize && oldmtime == newmtime
|
unchanged = oldinode == newinode && oldsize == newsize && oldmtime == newmtime
|
||||||
|
@ -211,10 +262,24 @@ instance Arbitrary InodeCache where
|
||||||
let prim = InodeCachePrim
|
let prim = InodeCachePrim
|
||||||
<$> arbitrary
|
<$> arbitrary
|
||||||
<*> arbitrary
|
<*> arbitrary
|
||||||
-- timestamp cannot be negative
|
<*> arbitrary
|
||||||
<*> (abs . fromInteger <$> arbitrary)
|
|
||||||
in InodeCache <$> prim
|
in InodeCache <$> prim
|
||||||
|
|
||||||
|
instance Arbitrary MTime where
|
||||||
|
arbitrary = frequency
|
||||||
|
-- timestamp is not usually negative
|
||||||
|
[ (50, MTimeLowRes <$> (abs . fromInteger <$> arbitrary))
|
||||||
|
, (50, MTimeHighRes <$> (abs <$> arbposixtime))
|
||||||
|
]
|
||||||
|
where
|
||||||
|
-- include fractional part, which the usual instance does not
|
||||||
|
arbposixtime = do
|
||||||
|
t <- arbitrary
|
||||||
|
f <- arbitrary
|
||||||
|
return $ if f == 0
|
||||||
|
then t
|
||||||
|
else t + recip f
|
||||||
|
|
||||||
#ifdef mingw32_HOST_OS
|
#ifdef mingw32_HOST_OS
|
||||||
instance Arbitrary FileID where
|
instance Arbitrary FileID where
|
||||||
arbitrary = fromIntegral <$> (arbitrary :: Gen Word64)
|
arbitrary = fromIntegral <$> (arbitrary :: Gen Word64)
|
||||||
|
|
|
@ -31,7 +31,7 @@ instance (Arbitrary v, Ord v) => Arbitrary (S.Set v) where
|
||||||
arbitrary = S.fromList <$> arbitrary
|
arbitrary = S.fromList <$> arbitrary
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
{- Times before the epoch are excluded. -}
|
{- Times before the epoch are excluded, and no fraction is included. -}
|
||||||
instance Arbitrary POSIXTime where
|
instance Arbitrary POSIXTime where
|
||||||
arbitrary = fromInteger <$> nonNegative arbitrarySizedIntegral
|
arbitrary = fromInteger <$> nonNegative arbitrarySizedIntegral
|
||||||
|
|
||||||
|
|
|
@ -6,13 +6,18 @@ started checking the InodeCache to see if a file is modified.
|
||||||
|
|
||||||
This means that modifying a file, running `git add`, then modifying again
|
This means that modifying a file, running `git add`, then modifying again
|
||||||
and `git add` within the same second won't stage the second version of the
|
and `git add` within the same second won't stage the second version of the
|
||||||
file.
|
file. (Although luckily it also compares file size.)
|
||||||
|
|
||||||
I think that optimisation needs to be disabled when inode caches will be
|
I think that optimisation needs to be disabled when inode caches will be
|
||||||
compared weakly, because 2 seconds is just too long. This will mean slow
|
compared weakly, because 2 seconds is just too long. This will mean slow
|
||||||
`git checkout` on FAT and also when a user moves a repo to a different
|
`git checkout` on FAT and also when a user moves a repo to a different
|
||||||
filesystem. But I don't see a way to avoid it.
|
filesystem. But I don't see a way to avoid it.
|
||||||
|
|
||||||
|
> Hmm, on second thought, that would mean every inAnnex on FAT
|
||||||
|
> would need to checksum the content. That's just too slow to be practical.
|
||||||
|
> `git annex fsck` will clean up if trusting the timestamps causes
|
||||||
|
> it to make a mistake on FAT.
|
||||||
|
|
||||||
Otherwise, the problem can be fixed by using modificationTimeHiRes.
|
Otherwise, the problem can be fixed by using modificationTimeHiRes.
|
||||||
|
|
||||||
But! All existing InodeCaches would then appear to have changed. This would
|
But! All existing InodeCaches would then appear to have changed. This would
|
||||||
|
@ -36,4 +41,4 @@ What would work, w/o breaking back-compat is
|
||||||
So the decimal part of the mtime becomes the 4th word and old
|
So the decimal part of the mtime becomes the 4th word and old
|
||||||
versions of git-annex will ignore it.
|
versions of git-annex will ignore it.
|
||||||
|
|
||||||
--[[Joey]]
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue