Bugfix: Direct mode no longer repeatedly checksums duplicated files.
Fixed by storing a list of cached inodes for a key, instead of just one. Backwards compatability note: An old git-annex version will fail to parse an inode cache file that has been written by a new version, and has multiple items. It will succees if just one. So old git-annexes will have even worse behavior when there are duplicated files, if that is possible. I don't think it will be a problem. (Famous last words.) Also, note that it doesn't expire old and unused inode caches for a key. It would be possible to add this if needed; just look through the associated files for a key and if there are more cached inodes, throw out any not corresponding to associated files. Unless a file is being copied repeatedly and the old copy deleted, this lack of expiry should not be a problem.
This commit is contained in:
parent
54d7637b3a
commit
602baae12e
6 changed files with 56 additions and 26 deletions
|
@ -12,10 +12,12 @@ module Annex.Content.Direct (
|
||||||
goodContent,
|
goodContent,
|
||||||
recordedInodeCache,
|
recordedInodeCache,
|
||||||
updateInodeCache,
|
updateInodeCache,
|
||||||
|
addInodeCache,
|
||||||
writeInodeCache,
|
writeInodeCache,
|
||||||
compareInodeCaches,
|
compareInodeCaches,
|
||||||
compareInodeCachesWith,
|
compareInodeCachesWith,
|
||||||
sameInodeCache,
|
sameInodeCache,
|
||||||
|
elemInodeCaches,
|
||||||
sameFileStatus,
|
sameFileStatus,
|
||||||
removeInodeCache,
|
removeInodeCache,
|
||||||
toInodeCache,
|
toInodeCache,
|
||||||
|
@ -101,21 +103,36 @@ normaliseAssociatedFile file = do
|
||||||
goodContent :: Key -> FilePath -> Annex Bool
|
goodContent :: Key -> FilePath -> Annex Bool
|
||||||
goodContent key file = sameInodeCache file =<< recordedInodeCache key
|
goodContent key file = sameInodeCache file =<< recordedInodeCache key
|
||||||
|
|
||||||
{- Gets the recorded inode cache for a key. -}
|
{- Gets the recorded inode cache for a key.
|
||||||
recordedInodeCache :: Key -> Annex (Maybe InodeCache)
|
-
|
||||||
|
- A key can be associated with multiple files, so may return more than
|
||||||
|
- one. -}
|
||||||
|
recordedInodeCache :: Key -> Annex [InodeCache]
|
||||||
recordedInodeCache key = withInodeCacheFile key $ \f ->
|
recordedInodeCache key = withInodeCacheFile key $ \f ->
|
||||||
liftIO $ catchDefaultIO Nothing $ readInodeCache <$> readFile f
|
liftIO $ catchDefaultIO [] $
|
||||||
|
mapMaybe readInodeCache . lines <$> readFile f
|
||||||
|
|
||||||
{- Stores a cache of attributes for a file that is associated with a key. -}
|
{- Caches an inode for a file.
|
||||||
|
-
|
||||||
|
- Anything else already cached is preserved.
|
||||||
|
-}
|
||||||
updateInodeCache :: Key -> FilePath -> Annex ()
|
updateInodeCache :: Key -> FilePath -> Annex ()
|
||||||
updateInodeCache key file = maybe noop (writeInodeCache key)
|
updateInodeCache key file = maybe noop (addInodeCache key)
|
||||||
=<< liftIO (genInodeCache file)
|
=<< liftIO (genInodeCache file)
|
||||||
|
|
||||||
{- Writes a cache for a key. -}
|
{- Adds another inode to the cache for a key. -}
|
||||||
writeInodeCache :: Key -> InodeCache -> Annex ()
|
addInodeCache :: Key -> InodeCache -> Annex ()
|
||||||
writeInodeCache key cache = withInodeCacheFile key $ \f -> do
|
addInodeCache key cache = do
|
||||||
|
oldcaches <- recordedInodeCache key
|
||||||
|
unlessM (elemInodeCaches cache oldcaches) $
|
||||||
|
writeInodeCache key (cache:oldcaches)
|
||||||
|
|
||||||
|
{- Writes inode cache for a key. -}
|
||||||
|
writeInodeCache :: Key -> [InodeCache] -> Annex ()
|
||||||
|
writeInodeCache key caches = withInodeCacheFile key $ \f -> do
|
||||||
createContentDir f
|
createContentDir f
|
||||||
liftIO $ writeFile f $ showInodeCache cache
|
liftIO $ writeFile f $
|
||||||
|
unlines $ map showInodeCache caches
|
||||||
|
|
||||||
{- Removes an inode cache. -}
|
{- Removes an inode cache. -}
|
||||||
removeInodeCache :: Key -> Annex ()
|
removeInodeCache :: Key -> Annex ()
|
||||||
|
@ -127,12 +144,12 @@ withInodeCacheFile :: Key -> (FilePath -> Annex a) -> Annex a
|
||||||
withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key)
|
withInodeCacheFile key a = a =<< calcRepo (gitAnnexInodeCache key)
|
||||||
|
|
||||||
{- Checks if a InodeCache matches the current version of a file. -}
|
{- Checks if a InodeCache matches the current version of a file. -}
|
||||||
sameInodeCache :: FilePath -> Maybe InodeCache -> Annex Bool
|
sameInodeCache :: FilePath -> [InodeCache] -> Annex Bool
|
||||||
sameInodeCache _ Nothing = return False
|
sameInodeCache _ [] = return False
|
||||||
sameInodeCache file (Just old) = go =<< liftIO (genInodeCache file)
|
sameInodeCache file old = go =<< liftIO (genInodeCache file)
|
||||||
where
|
where
|
||||||
go Nothing = return False
|
go Nothing = return False
|
||||||
go (Just curr) = compareInodeCaches curr old
|
go (Just curr) = elemInodeCaches curr old
|
||||||
|
|
||||||
{- Checks if a FileStatus matches the recorded InodeCache of a file. -}
|
{- Checks if a FileStatus matches the recorded InodeCache of a file. -}
|
||||||
sameFileStatus :: Key -> FileStatus -> Annex Bool
|
sameFileStatus :: Key -> FileStatus -> Annex Bool
|
||||||
|
@ -140,8 +157,8 @@ sameFileStatus key status = do
|
||||||
old <- recordedInodeCache key
|
old <- recordedInodeCache key
|
||||||
let curr = toInodeCache status
|
let curr = toInodeCache status
|
||||||
case (old, curr) of
|
case (old, curr) of
|
||||||
(Just o, Just c) -> compareInodeCaches o c
|
(_, Just c) -> elemInodeCaches c old
|
||||||
(Nothing, Nothing) -> return True
|
([], Nothing) -> return True
|
||||||
_ -> return False
|
_ -> return False
|
||||||
|
|
||||||
{- If the inodes have changed, only the size and mtime are compared. -}
|
{- If the inodes have changed, only the size and mtime are compared. -}
|
||||||
|
@ -153,6 +170,13 @@ compareInodeCaches x y
|
||||||
, return False
|
, return False
|
||||||
)
|
)
|
||||||
|
|
||||||
|
elemInodeCaches :: InodeCache -> [InodeCache] -> Annex Bool
|
||||||
|
elemInodeCaches _ [] = return False
|
||||||
|
elemInodeCaches c (l:ls) = ifM (compareInodeCaches c l)
|
||||||
|
( return True
|
||||||
|
, elemInodeCaches c ls
|
||||||
|
)
|
||||||
|
|
||||||
compareInodeCachesWith :: Annex InodeComparisonType
|
compareInodeCachesWith :: Annex InodeComparisonType
|
||||||
compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
|
compareInodeCachesWith = ifM inodesChanged ( return Weakly, return Strongly )
|
||||||
|
|
||||||
|
|
|
@ -52,8 +52,8 @@ stageDirect = do
|
||||||
- it really was. -}
|
- it really was. -}
|
||||||
oldcache <- recordedInodeCache key
|
oldcache <- recordedInodeCache key
|
||||||
case oldcache of
|
case oldcache of
|
||||||
Nothing -> modifiedannexed file key cache
|
[] -> modifiedannexed file key cache
|
||||||
Just c -> unlessM (compareInodeCaches c cache) $
|
_ -> unlessM (elemInodeCaches cache oldcache) $
|
||||||
modifiedannexed file key cache
|
modifiedannexed file key cache
|
||||||
(Just key, Nothing, _) -> deletedannexed file key
|
(Just key, Nothing, _) -> deletedannexed file key
|
||||||
(Nothing, Nothing, _) -> deletegit file
|
(Nothing, Nothing, _) -> deletegit file
|
||||||
|
@ -87,11 +87,11 @@ addDirect file cache = do
|
||||||
got Nothing = do
|
got Nothing = do
|
||||||
showEndFail
|
showEndFail
|
||||||
return False
|
return False
|
||||||
got (Just (key, _)) = ifM (sameInodeCache file $ Just cache)
|
got (Just (key, _)) = ifM (sameInodeCache file [cache])
|
||||||
( do
|
( do
|
||||||
l <- inRepo $ gitAnnexLink file key
|
l <- inRepo $ gitAnnexLink file key
|
||||||
stageSymlink file =<< hashSymlink l
|
stageSymlink file =<< hashSymlink l
|
||||||
writeInodeCache key cache
|
addInodeCache key cache
|
||||||
void $ addAssociatedFile key file
|
void $ addAssociatedFile key file
|
||||||
logStatus key InfoPresent
|
logStatus key InfoPresent
|
||||||
showEndOk
|
showEndOk
|
||||||
|
|
|
@ -297,13 +297,10 @@ handleAdds delayadd cs = returnWhen (null incomplete) $ do
|
||||||
removedKeysMap ct l = do
|
removedKeysMap ct l = do
|
||||||
mks <- forM (filter isRmChange l) $ \c ->
|
mks <- forM (filter isRmChange l) $ \c ->
|
||||||
catKeyFile $ changeFile c
|
catKeyFile $ changeFile c
|
||||||
M.fromList . catMaybes <$> forM (catMaybes mks) mkpair
|
M.fromList . concat <$> mapM mkpairs (catMaybes mks)
|
||||||
where
|
where
|
||||||
mkpair k = do
|
mkpairs k = map (\c -> (inodeCacheToKey ct c, k)) <$>
|
||||||
mcache <- recordedInodeCache k
|
recordedInodeCache k
|
||||||
case mcache of
|
|
||||||
Just cache -> return $ Just (inodeCacheToKey ct cache, k)
|
|
||||||
Nothing -> return Nothing
|
|
||||||
|
|
||||||
failedingest = do
|
failedingest = do
|
||||||
liftAnnex showEndFail
|
liftAnnex showEndFail
|
||||||
|
|
|
@ -132,7 +132,7 @@ ingest (Just source) = do
|
||||||
goindirect Nothing _ = failure
|
goindirect Nothing _ = failure
|
||||||
|
|
||||||
godirect (Just (key, _)) (Just cache) = do
|
godirect (Just (key, _)) (Just cache) = do
|
||||||
writeInodeCache key cache
|
addInodeCache key cache
|
||||||
finishIngestDirect key source
|
finishIngestDirect key source
|
||||||
return $ Just key
|
return $ Just key
|
||||||
godirect _ _ = failure
|
godirect _ _ = failure
|
||||||
|
|
6
debian/changelog
vendored
6
debian/changelog
vendored
|
@ -1,3 +1,9 @@
|
||||||
|
git-annex (4.20130406) UNRELEASED; urgency=low
|
||||||
|
|
||||||
|
* Bugfix: Direct mode no longer repeatedly checksums duplicated files.
|
||||||
|
|
||||||
|
-- Joey Hess <joeyh@debian.org> Sat, 06 Apr 2013 15:24:15 -0400
|
||||||
|
|
||||||
git-annex (4.20130405) unstable; urgency=low
|
git-annex (4.20130405) unstable; urgency=low
|
||||||
|
|
||||||
* Group subcommands into sections in usage. Closes: #703797
|
* Group subcommands into sections in usage. Closes: #703797
|
||||||
|
|
|
@ -20,3 +20,6 @@ Secondly, the sync can take quite a while if you have lots of duplicates or a lo
|
||||||
##What version of git-annex are you using? On what operating system?
|
##What version of git-annex are you using? On what operating system?
|
||||||
|
|
||||||
git-annex version: 4.20130227 on Archlinux
|
git-annex version: 4.20130227 on Archlinux
|
||||||
|
|
||||||
|
> [[done]]; fixed inode caching code to support multiple files for the
|
||||||
|
> same content. --[[Joey]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue