Bugfix: Direct mode no longer repeatedly checksums duplicated files.
Fixed by storing a list of cached inodes for a key, instead of just one. Backwards compatability note: An old git-annex version will fail to parse an inode cache file that has been written by a new version, and has multiple items. It will succees if just one. So old git-annexes will have even worse behavior when there are duplicated files, if that is possible. I don't think it will be a problem. (Famous last words.) Also, note that it doesn't expire old and unused inode caches for a key. It would be possible to add this if needed; just look through the associated files for a key and if there are more cached inodes, throw out any not corresponding to associated files. Unless a file is being copied repeatedly and the old copy deleted, this lack of expiry should not be a problem.
This commit is contained in:
parent
54d7637b3a
commit
602baae12e
6 changed files with 56 additions and 26 deletions
|
@ -12,10 +12,12 @@ module Annex.Content.Direct (
|
|||
goodContent,
|
||||
recordedInodeCache,
|
||||
updateInodeCache,
|
||||
addInodeCache,
|
||||
writeInodeCache,
|
||||
compareInodeCaches,
|
||||
compareInodeCachesWith,
|
||||
sameInodeCache,
|
||||
elemInodeCaches,
|
||||
sameFileStatus,
|
||||
removeInodeCache,
|
||||
toInodeCache,
|
||||
|
@ -101,21 +103,36 @@ normaliseAssociatedFile file = do
|
|||
goodContent :: Key -> FilePath -> Annex Bool
|
||||
goodContent key file = sameInodeCache file =<< recordedInodeCache key
|
||||
|
||||
{- Gets the recorded inode cache for a key. -}
|
||||
recordedInodeCache :: Key -> Annex (Maybe InodeCache)
|
||||
{- Gets the recorded inode cache for a key.
|
||||
-
|
||||
- A key can be associated with multiple files, so may return more than
|
||||
- one. -}
|
||||
recordedInodeCache :: Key -> Annex [InodeCache]
|
||||
recordedInodeCache key = withInodeCacheFile key $ \f ->
|
||||
liftIO $ catchDefaultIO Nothing $ readInodeCache <$> readFile f
|
||||
liftIO $ catchDefaultIO [] $
|
||||
mapMaybe readInodeCache . lines <$> readFile f
|
||||
|
||||
{- Stores a cache of attributes for a file that is associated with a key. -}
|
||||
{- Caches an inode for a file.
|
||||
-
|
||||
- Anything else already cached is preserved.
|
||||
-}
|
||||
updateInodeCache :: Key -> FilePath -> Annex ()
|
||||
updateInodeCache key file = maybe noop (writeInodeCache key)
|
||||
updateInodeCache key file = maybe noop (addInodeCache key)
|
||||
=<< liftIO (genInodeCache file)
|
||||
|
||||
{- Writes a cache for a key. -}
|
||||
writeInodeCache :: Key -> InodeCache -> Annex ()
|
||||
writeInodeCache key cache = withInodeCacheFile key $ \f -> do
|
||||
{- Adds another inode to the cache for a key. -}
|
||||
addInodeCache :: Key -> InodeCache -> Annex ()
|
||||
addInodeCache key cache = do
|
||||
oldcaches <- recordedInodeCache key
|
||||
unlessM (elemInodeCaches cache oldcaches) $
|
||||
writeInodeCache key (cache:oldcaches)
|
||||
|
||||
{- Writes inode cache for a key. -}
|
||||
writeInodeCache :: Key -> [InodeCache] -> Annex ()
|
||||
writeInodeCache key caches = withInodeCacheFile key $ \f -> do
|
||||
createContentDir f
|
||||
liftIO $ writeFile f $ showInodeCache cache
|
||||
liftIO $ writeFile f $
|
||||
unlines $ map showInodeCache caches
|
||||
|
||||
{- Removes an inode cache. -}
|
||||
removeInodeCache :: Key -> Annex ()
|
||||
|
@ -127,12 +144,12 @@ withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a
|
|||
withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key)
|
||||
|
||||
{- Checks if a InodeCache matches the current version of a file. -}
|
||||
sameInodeCache :: FilePath -> Maybe InodeCache -> Annex Bool
|
||||
sameInodeCache _ Nothing = return False
|
||||
sameInodeCache file (Just old) = go =<< liftIO (genInodeCache file)
|
||||
sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool
|
||||
sameInodeCache _ [] = return False
|
||||
sameInodeCache file old = go =<< liftIO (genInodeCache file)
|
||||
where
|
||||
go Nothing = return False
|
||||
go (Just curr) = compareInodeCaches curr old
|
||||
go (Just curr) = elemInodeCaches curr old
|
||||
|
||||
{- Checks if a FileStatus matches the recorded InodeCache of a file. -}
|
||||
sameFileStatus :: Key -> FileStatus -> Annex Bool
|
||||
|
@ -140,8 +157,8 @@ sameFileStatus key status = do
|
|||
old <- recordedInodeCache key
|
||||
let curr = toInodeCache status
|
||||
case (old, curr) of
|
||||
(Just o, Just c) -> compareInodeCaches o c
|
||||
(Nothing, Nothing) -> return True
|
||||
(_, Just c) -> elemInodeCaches c old
|
||||
([], Nothing) -> return True
|
||||
_ -> return False
|
||||
|
||||
{- If the inodes have changed, only the size and mtime are compared. -}
|
||||
|
@ -153,6 +170,13 @@ compareInodeCaches x y
|
|||
, return False
|
||||
)
|
||||
|
||||
elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool
|
||||
elemInodeCaches _ [] = return False
|
||||
elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l)
|
||||
( return True
|
||||
, elemInodeCaches c ls
|
||||
)
|
||||
|
||||
compareInodeCachesWith :: Annex InodeComparisonType
|
||||
compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
|
||||
|
||||
|
|
|
@ -52,8 +52,8 @@ stageDirect = do
|
|||
- it really was. -}
|
||||
oldcache <- recordedInodeCache key
|
||||
case oldcache of
|
||||
Nothing -> modifiedannexed file key cache
|
||||
Just c -> unlessM (compareInodeCaches c cache) $
|
||||
[] -> modifiedannexed file key cache
|
||||
_ -> unlessM (elemInodeCaches cache oldcache) $
|
||||
modifiedannexed file key cache
|
||||
(Just key, Nothing, _) -> deletedannexed file key
|
||||
(Nothing, Nothing, _) -> deletegit file
|
||||
|
@ -87,11 +87,11 @@ addDirect file cache = do
|
|||
got Nothing = do
|
||||
showEndFail
|
||||
return False
|
||||
got (Just (key, _)) = ifM (sameInodeCache file $ Just cache)
|
||||
got (Just (key, _)) = ifM (sameInodeCache file [cache])
|
||||
( do
|
||||
l <- inRepo $ gitAnnexLink file key
|
||||
stageSymlink file =<< hashSymlink l
|
||||
writeInodeCache key cache
|
||||
addInodeCache key cache
|
||||
void $ addAssociatedFile key file
|
||||
logStatus key InfoPresent
|
||||
showEndOk
|
||||
|
|
|
@ -297,13 +297,10 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
|
|||
removedKeysMap ct l = do
|
||||
mks <- forM (filter isRmChange l) $ \c ->
|
||||
catKeyFile $ changeFile c
|
||||
M.fromList . catMaybes <$> forM (catMaybes mks) mkpair
|
||||
M.fromList . concat <$> mapM mkpairs (catMaybes mks)
|
||||
where
|
||||
mkpair k = do
|
||||
mcache <- recordedInodeCache k
|
||||
case mcache of
|
||||
Just cache -> return $ Just (inodeCacheToKey ct cache, k)
|
||||
Nothing -> return Nothing
|
||||
mkpairs k = map (\c -> (inodeCacheToKey ct c, k)) <$>
|
||||
recordedInodeCache k
|
||||
|
||||
failedingest = do
|
||||
liftAnnex showEndFail
|
||||
|
|
|
@ -132,7 +132,7 @@ ingest (Just source) = do
|
|||
goindirect Nothing _ = failure
|
||||
|
||||
godirect (Just (key, _)) (Just cache) = do
|
||||
writeInodeCache key cache
|
||||
addInodeCache key cache
|
||||
finishIngestDirect key source
|
||||
return $ Just key
|
||||
godirect _ _ = failure
|
||||
|
|
6
debian/changelog
vendored
6
debian/changelog
vendored
|
@ -1,3 +1,9 @@
|
|||
git-annex (4.20130406) UNRELEASED; urgency=low
|
||||
|
||||
* Bugfix: Direct mode no longer repeatedly checksums duplicated files.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Sat, 06 Apr 2013 15:24:15 -0400
|
||||
|
||||
git-annex (4.20130405) unstable; urgency=low
|
||||
|
||||
* Group subcommands into sections in usage. Closes: #703797
|
||||
|
|
|
@ -20,3 +20,6 @@ Secondly, the sync can take quite a while if you have lots of duplicates or a lo
|
|||
##What version of git-annex are you using? On what operating system?
|
||||
|
||||
git-annex version: 4.20130227 on Archlinux
|
||||
|
||||
> [[done]]; fixed inode caching code to support multiple files for the
|
||||
> same content. --[[Joey]]
|
||||
|
|
Loading…
Reference in a new issue