detect renames during commit, and .. um, do nothing special because it's lunch time

But I'm well set up to fast-track direct mode adds for renames now.
This commit is contained in:
Joey Hess 2013-03-11 12:56:47 -04:00
parent 82b718e830
commit 61c5e8736c
4 changed files with 103 additions and 20 deletions

View file

@ -14,6 +14,7 @@ module Annex.Content.Direct (
updateInodeCache, updateInodeCache,
writeInodeCache, writeInodeCache,
compareInodeCaches, compareInodeCaches,
compareInodeCachesWith,
sameInodeCache, sameInodeCache,
sameFileStatus, sameFileStatus,
removeInodeCache, removeInodeCache,
@ -147,12 +148,15 @@ sameFileStatus key status = do
{- If the inodes have changed, only the size and mtime are compared. -} {- If the inodes have changed, only the size and mtime are compared. -}
compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool compareInodeCaches :: InodeCache -> InodeCache -> Annex Bool
compareInodeCaches x y compareInodeCaches x y
| x `compareStrong` y = return True | compareStrong x y = return True
| otherwise = ifM inodesChanged | otherwise = ifM inodesChanged
( return $ compareWeak x y ( return $ compareWeak x y
, return False , return False
) )
compareInodeCachesWith :: Annex InodeComparisonType
compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
{- Some filesystems get new inodes each time they are mounted. {- Some filesystems get new inodes each time they are mounted.
- In order to work on such a filesystem, a sentinal file is used to detect - In order to work on such a filesystem, a sentinal file is used to detect
- when the inodes have changed. - when the inodes have changed.

View file

@ -32,11 +32,15 @@ import Config
import Annex.Exception import Annex.Exception
import Annex.Content import Annex.Content
import Annex.Link import Annex.Link
import Annex.CatFile
import qualified Annex import qualified Annex
import Utility.InodeCache
import Annex.Content.Direct
import Data.Time.Clock import Data.Time.Clock
import Data.Tuple.Utils import Data.Tuple.Utils
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Map as M
import Data.Either import Data.Either
import Control.Concurrent import Control.Concurrent
@ -90,9 +94,13 @@ waitChangeTime a = runEvery (Seconds 1) <~> do
{- Did we perhaps only get one of the AddChange and RmChange pair {- Did we perhaps only get one of the AddChange and RmChange pair
- that make up a rename? -} - that make up a rename? -}
lonelychange [(PendingAddChange _ _)] = True lonelychange [(PendingAddChange _ _)] = True
lonelychange [(Change { changeInfo = i })] | i == RmChange = True lonelychange [c] | isRmChange c = True
lonelychange _ = False lonelychange _ = False
isRmChange :: Change -> Bool
isRmChange (Change { changeInfo = i }) | i == RmChange = True
isRmChange _ = False
{- An amount of time that is hopefully imperceptably short for humans, {- An amount of time that is hopefully imperceptably short for humans,
- while long enough for a computer to get some work done. - while long enough for a computer to get some work done.
- Note that 0.001 is a little too short for rename change batching to - Note that 0.001 is a little too short for rename change batching to
@ -200,7 +208,9 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
refillChanges postponed refillChanges postponed
returnWhen (null toadd) $ do returnWhen (null toadd) $ do
added <- catMaybes <$> forM toadd add added <- catMaybes <$> if direct
then adddirect toadd
else forM toadd add
if DirWatcher.eventsCoalesce || null added || direct if DirWatcher.eventsCoalesce || null added || direct
then return $ added ++ otherchanges then return $ added ++ otherchanges
else do else do
@ -238,6 +248,45 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
ret _ = (True, Nothing) ret _ = (True, Nothing)
add _ = return Nothing add _ = return Nothing
{- In direct mode, avoid overhead of re-injesting a renamed
- file, by examining the other Changes to see if a removed
- file has the same InodeCache as the new file. If so,
- we can just update bookkeeping, and stage the file in git.
-}
adddirect :: [Change] -> Assistant [Maybe Change]
adddirect toadd = do
ct <- liftAnnex compareInodeCachesWith
m <- liftAnnex $ removedKeysMap ct cs
if M.null m
then forM toadd add
else forM toadd $ \c -> do
mcache <- liftIO $ genInodeCache $ changeFile c
case mcache of
Nothing -> add c
Just cache ->
case M.lookup (inodeCacheToKey ct cache) m of
Nothing -> add c
Just k -> fastadd c k cache
fastadd :: Change -> Key -> InodeCache -> Assistant (Maybe Change)
fastadd change key cache = do
-- TODO do fast method
debug ["rename detected", show change, show key, show cache]
add change
--return $ Just $ finishedChange change key
removedKeysMap :: InodeComparisonType -> [Change] -> Annex (M.Map InodeCacheKey Key)
removedKeysMap ct l = do
mks <- forM (filter isRmChange l) $ \c ->
catKeyFile $ changeFile c
M.fromList . catMaybes <$> forM (catMaybes mks) mkpair
where
mkpair k = do
mcache <- recordedInodeCache k
case mcache of
Just cache -> return $ Just (inodeCacheToKey ct cache, k)
Nothing -> return Nothing
failedingest = do failedingest = do
liftAnnex showEndFail liftAnnex showEndFail
return Nothing return Nothing

View file

@ -11,22 +11,46 @@ import Common
import System.Posix.Types import System.Posix.Types
import Utility.QuickCheck import Utility.QuickCheck
data InodeCache = InodeCache FileID FileOffset EpochTime data InodeCachePrim = InodeCachePrim FileID FileOffset EpochTime
deriving (Show, Eq, Ord)
newtype InodeCache = InodeCache InodeCachePrim
deriving (Show) deriving (Show)
compareStrong :: InodeCache -> InodeCache -> Bool {- Inode caches can be compared in two different ways, either weakly
compareStrong (InodeCache inode1 size1 mtime1) (InodeCache inode2 size2 mtime2) = - or strongly. -}
inode1 == inode2 && size1 == size2 && mtime1 == mtime2 data InodeComparisonType = Weakly | Strongly
deriving (Eq, Ord)
{- Weak comparison of the inode caches, comparing the size and mtime, but {- Strong comparison, including inodes. -}
- not the actual inode. Useful when inodes have changed, perhaps compareStrong :: InodeCache -> InodeCache -> Bool
compareStrong (InodeCache x) (InodeCache y) = x == y
{- Weak comparison of the inode caches, comparing the size and mtime,
- but not the actual inode. Useful when inodes have changed, perhaps
- due to some filesystems being remounted. -} - due to some filesystems being remounted. -}
compareWeak :: InodeCache -> InodeCache -> Bool compareWeak :: InodeCache -> InodeCache -> Bool
compareWeak (InodeCache _ size1 mtime1) (InodeCache _ size2 mtime2) = compareWeak (InodeCache (InodeCachePrim _ size1 mtime1)) (InodeCache (InodeCachePrim _ size2 mtime2)) =
size1 == size2 && mtime1 == mtime2 size1 == size2 && mtime1 == mtime2
compareBy :: InodeComparisonType -> InodeCache -> InodeCache -> Bool
compareBy Strongly = compareStrong
compareBy Weakly = compareWeak
{- For use in a Map; it's determined at creation time whether this
- uses strong or weak comparison for Eq. -}
data InodeCacheKey = InodeCacheKey InodeComparisonType InodeCachePrim
deriving (Ord)
instance Eq InodeCacheKey where
(InodeCacheKey ctx x) == (InodeCacheKey cty y) =
compareBy (maximum [ctx,cty]) (InodeCache x ) (InodeCache y)
inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
showInodeCache :: InodeCache -> String showInodeCache :: InodeCache -> String
showInodeCache (InodeCache inode size mtime) = unwords showInodeCache (InodeCache (InodeCachePrim inode size mtime)) = unwords
[ show inode [ show inode
, show size , show size
, show mtime , show mtime
@ -34,10 +58,12 @@ showInodeCache (InodeCache inode size mtime) = unwords
readInodeCache :: String -> Maybe InodeCache readInodeCache :: String -> Maybe InodeCache
readInodeCache s = case words s of readInodeCache s = case words s of
(inode:size:mtime:_) -> InodeCache (inode:size:mtime:_) ->
<$> readish inode let prim = InodeCachePrim
<*> readish size <$> readish inode
<*> readish mtime <*> readish size
<*> readish mtime
in InodeCache <$> prim
_ -> Nothing _ -> Nothing
genInodeCache :: FilePath -> IO (Maybe InodeCache) genInodeCache :: FilePath -> IO (Maybe InodeCache)
@ -45,17 +71,19 @@ genInodeCache f = catchDefaultIO Nothing $ toInodeCache <$> getFileStatus f
toInodeCache :: FileStatus -> Maybe InodeCache toInodeCache :: FileStatus -> Maybe InodeCache
toInodeCache s toInodeCache s
| isRegularFile s = Just $ InodeCache | isRegularFile s = Just $ InodeCache $ InodeCachePrim
(fileID s) (fileID s)
(fileSize s) (fileSize s)
(modificationTime s) (modificationTime s)
| otherwise = Nothing | otherwise = Nothing
instance Arbitrary InodeCache where instance Arbitrary InodeCache where
arbitrary = InodeCache arbitrary =
<$> arbitrary let prim = InodeCachePrim
<*> arbitrary <$> arbitrary
<*> arbitrary <*> arbitrary
<*> arbitrary
in InodeCache <$> prim
prop_read_show_inodecache :: InodeCache -> Bool prop_read_show_inodecache :: InodeCache -> Bool
prop_read_show_inodecache c = case readInodeCache (showInodeCache c) of prop_read_show_inodecache c = case readInodeCache (showInodeCache c) of

2
debian/changelog vendored
View file

@ -50,6 +50,8 @@ git-annex (4.20130228) UNRELEASED; urgency=low
status of that directory, rather than the whole annex. status of that directory, rather than the whole annex.
* Added remote.<name>.annex-gnupg-options setting. * Added remote.<name>.annex-gnupg-options setting.
Thanks, guilhem for the patch. Thanks, guilhem for the patch.
* assistant: Optimised handling of renamed files in direct mode,
avoiding re-checksumming.
-- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400 -- Joey Hess <joeyh@debian.org> Wed, 27 Feb 2013 23:20:40 -0400