Bugfix: Direct mode no longer repeatedly checksums duplicated files.

Fixed by storing a list of cached inodes for a key, instead of just one.

Backwards compatability note: An old git-annex version will fail to parse
an inode cache file that has been written by a new version, and has
multiple items. It will succees if just one. So old git-annexes will have
even worse behavior when there are duplicated files, if that is possible.
I don't think it will be a problem. (Famous last words.)

Also, note that it doesn't expire old and unused inode caches for a key.
It would be possible to add this if needed; just look through the
associated files for a key and if there are more cached inodes, throw out
any not corresponding to associated files. Unless a file is being copied
repeatedly and the old copy deleted, this lack of expiry should not be a
problem.
This commit is contained in:
Joey Hess 2013-04-06 16:01:39 -04:00
parent 54d7637b3a
commit 602baae12e
6 changed files with 56 additions and 26 deletions

View file

@ -12,10 +12,12 @@ module Annex.Content.Direct (
goodContent,
recordedInodeCache,
updateInodeCache,
addInodeCache,
writeInodeCache,
compareInodeCaches,
compareInodeCachesWith,
sameInodeCache,
elemInodeCaches,
sameFileStatus,
removeInodeCache,
toInodeCache,
@ -101,21 +103,36 @@ normaliseAssociatedFile file = do
goodContent :: Key -> FilePath -> Annex Bool
goodContent key file = sameInodeCache file =<< recordedInodeCache key
{- Gets the recorded inode cache for a key. -}
recordedInodeCache :: Key -> Annex (Maybe InodeCache)
{- Gets the recorded inode cache for a key.
-
- A key can be associated with multiple files, so may return more than
- one. -}
recordedInodeCache :: Key -> Annex [InodeCache]
recordedInodeCache key = withInodeCacheFile key $ \f ->
liftIO $ catchDefaultIO Nothing $ readInodeCache <$> readFile f
liftIO $ catchDefaultIO [] $
mapMaybe readInodeCache . lines <$> readFile f
{- Stores a cache of attributes for a file that is associated with a key. -}
{- Caches an inode for a file.
-
- Anything else already cached is preserved.
-}
updateInodeCache :: Key -> FilePath -> Annex ()
updateInodeCache key file = maybe noop (writeInodeCache key)
updateInodeCache key file = maybe noop (addInodeCache key)
=<< liftIO (genInodeCache file)
{- Writes a cache for a key. -}
writeInodeCache :: Key -> InodeCache -> Annex ()
writeInodeCache key cache = withInodeCacheFile key $ \f -> do
{- Adds another inode to the cache for a key. -}
addInodeCache :: Key -> InodeCache -> Annex ()
addInodeCache key cache = do
oldcaches <- recordedInodeCache key
unlessM (elemInodeCaches cache oldcaches) $
writeInodeCache key (cache:oldcaches)
{- Writes inode cache for a key. -}
writeInodeCache :: Key -> [InodeCache] -> Annex ()
writeInodeCache key caches = withInodeCacheFile key $ \f -> do
createContentDir f
liftIO $ writeFile f $ showInodeCache cache
liftIO $ writeFile f $
unlines $ map showInodeCache caches
{- Removes an inode cache. -}
removeInodeCache :: Key -> Annex ()
@ -127,12 +144,12 @@ withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a
withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key)
{- Checks if a InodeCache matches the current version of a file. -}
sameInodeCache :: FilePath -> Maybe InodeCache -> Annex Bool
sameInodeCache _ Nothing = return False
sameInodeCache file (Just old) = go =<< liftIO (genInodeCache file)
sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool
sameInodeCache _ [] = return False
sameInodeCache file old = go =<< liftIO (genInodeCache file)
where
go Nothing = return False
go (Just curr) = compareInodeCaches curr old
go (Just curr) = elemInodeCaches curr old
{- Checks if a FileStatus matches the recorded InodeCache of a file. -}
sameFileStatus :: Key -> FileStatus -> Annex Bool
@ -140,8 +157,8 @@ sameFileStatus key status = do
old <- recordedInodeCache key
let curr = toInodeCache status
case (old, curr) of
(Just o, Just c) -> compareInodeCaches o c
(Nothing, Nothing) -> return True
(_, Just c) -> elemInodeCaches c old
([], Nothing) -> return True
_ -> return False
{- If the inodes have changed, only the size and mtime are compared. -}
@ -153,6 +170,13 @@ compareInodeCaches x y
, return False
)
elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool
elemInodeCaches _ [] = return False
elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l)
( return True
, elemInodeCaches c ls
)
compareInodeCachesWith :: Annex InodeComparisonType
compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )

View file

@ -52,8 +52,8 @@ stageDirect = do
- it really was. -}
oldcache <- recordedInodeCache key
case oldcache of
Nothing -> modifiedannexed file key cache
Just c -> unlessM (compareInodeCaches c cache) $
[] -> modifiedannexed file key cache
_ -> unlessM (elemInodeCaches cache oldcache) $
modifiedannexed file key cache
(Just key, Nothing, _) -> deletedannexed file key
(Nothing, Nothing, _) -> deletegit file
@ -87,11 +87,11 @@ addDirect file cache = do
got Nothing = do
showEndFail
return False
got (Just (key, _)) = ifM (sameInodeCache file $ Just cache)
got (Just (key, _)) = ifM (sameInodeCache file [cache])
( do
l <- inRepo $ gitAnnexLink file key
stageSymlink file =<< hashSymlink l
writeInodeCache key cache
addInodeCache key cache
void $ addAssociatedFile key file
logStatus key InfoPresent
showEndOk

View file

@ -297,13 +297,10 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
removedKeysMap ct l = do
mks <- forM (filter isRmChange l) $ \c ->
catKeyFile $ changeFile c
M.fromList . catMaybes <$> forM (catMaybes mks) mkpair
M.fromList . concat <$> mapM mkpairs (catMaybes mks)
where
mkpair k = do
mcache <- recordedInodeCache k
case mcache of
Just cache -> return $ Just (inodeCacheToKey ct cache, k)
Nothing -> return Nothing
mkpairs k = map (\c -> (inodeCacheToKey ct c, k)) <$>
recordedInodeCache k
failedingest = do
liftAnnex showEndFail

View file

@ -132,7 +132,7 @@ ingest (Just source) = do
goindirect Nothing _ = failure
godirect (Just (key, _)) (Just cache) = do
writeInodeCache key cache
addInodeCache key cache
finishIngestDirect key source
return $ Just key
godirect _ _ = failure

6
debian/changelog vendored
View file

@ -1,3 +1,9 @@
git-annex (4.20130406) UNRELEASED; urgency=low
* Bugfix: Direct mode no longer repeatedly checksums duplicated files.
-- Joey Hess <joeyh@debian.org> Sat, 06 Apr 2013 15:24:15 -0400
git-annex (4.20130405) unstable; urgency=low
* Group subcommands into sections in usage. Closes: #703797

View file

@ -20,3 +20,6 @@ Secondly, the sync can take quite a while if you have lots of duplicates or a lo
##What version of git-annex are you using? On what operating system?
git-annex version: 4.20130227 on Archlinux
> [[done]]; fixed inode caching code to support multiple files for the
> same content. --[[Joey]]