Fix direct mode getKeysPresent false positive & also sped up direct mode unused and unannex

unused: In direct mode, files that are deleted from the work tree are no longer incorrectly detected as unused.

Direct mode `git annex info` slows down a bit due to more stringent
checking, but not by a lot.
This commit is contained in:
Joey Hess 2014-03-07 12:43:56 -04:00
parent 580adbcaee
commit 1f99a6778f
7 changed files with 52 additions and 18 deletions

View file

@ -24,6 +24,7 @@ module Annex.Content (
removeAnnex,
fromAnnex,
moveBad,
KeyLocation(..),
getKeysPresent,
saveState,
downloadUrl,
@ -466,22 +467,33 @@ moveBad key = do
logStatus key InfoMissing
return dest
{- List of keys whose content exists in the annex. -}
getKeysPresent :: Annex [Key]
getKeysPresent = do
data KeyLocation = InAnnex | InRepository
{- List of keys whose content exists in the specified location.
- InAnnex only lists keys under .git/annex/objects,
- while InRepository, in direct mode, also finds keys located in the
- work tree.
-
- Note that InRepository has to check whether direct mode files
- have goodContent.
-}
getKeysPresent :: KeyLocation -> Annex [Key]
getKeysPresent keyloc = do
direct <- isDirect
dir <- fromRepo gitAnnexObjectDir
liftIO $ traverse direct (2 :: Int) dir
s <- getstate direct
liftIO $ traverse s direct (2 :: Int) dir
where
traverse direct depth dir = do
traverse s direct depth dir = do
contents <- catchDefaultIO [] (dirContents dir)
if depth == 0
then do
contents' <- filterM (present direct) contents
contents' <- filterM (present s direct) contents
let keys = mapMaybe (fileKey . takeFileName) contents'
continue keys []
else do
let deeper = traverse direct (depth - 1)
let deeper = traverse s direct (depth - 1)
continue [] (map deeper contents)
continue keys [] = return keys
continue keys (a:as) = do
@ -489,15 +501,31 @@ getKeysPresent = do
morekeys <- unsafeInterleaveIO a
continue (morekeys++keys) as
{- In indirect mode, look for the key. In direct mode,
- the inode cache file is only present when a key's content
- is present, so can be used as a surrogate if the content
- is not located in the annex directory. -}
present False d = doesFileExist $ contentfile d
present True d = doesFileExist (contentfile d ++ ".cache")
<||> present False d
present _ False d = presentInAnnex d
present s True d = presentDirect s d <||> presentInAnnex d
presentInAnnex = doesFileExist . contentfile
contentfile d = d </> takeFileName d
presentDirect s d = case keyloc of
InAnnex -> return False
InRepository -> case fileKey (takeFileName d) of
Nothing -> return False
Just k -> Annex.eval s $
anyM (goodContent k) =<< associatedFiles k
{- In order to run Annex monad actions within unsafeInterleaveIO,
- the current state is taken and reused. No changes made to this
- state will be preserved.
-
- As an optimsation, call inodesChanged to prime the state with
- a cached value that will be used in the call to goodContent.
-}
getstate direct = do
when direct $
void $ inodesChanged
Annex.getState id
{- Things to do to record changes to content when shutting down.
-
- It's acceptable to avoid committing changes to the branch,

View file

@ -198,7 +198,7 @@ enableDirectMode = unlessM isDirect $ do
-}
fixBadBare :: Annex ()
fixBadBare = whenM checkBadBare $ do
ks <- getKeysPresent
ks <- getKeysPresent InAnnex
liftIO $ debugM "Init" $ unwords
[ "Detected bad bare repository with"
, show (length ks)

View file

@ -281,7 +281,7 @@ cachedPresentData = do
case presentData s of
Just v -> return v
Nothing -> do
v <- foldKeys <$> lift getKeysPresent
v <- foldKeys <$> lift (getKeysPresent InRepository)
put s { presentData = Just v }
return v

View file

@ -53,7 +53,7 @@ finish :: Annex ()
finish = do
annexdir <- fromRepo gitAnnexDir
annexobjectdir <- fromRepo gitAnnexObjectDir
leftovers <- removeUnannexed =<< getKeysPresent
leftovers <- removeUnannexed =<< getKeysPresent InAnnex
if null leftovers
then liftIO $ removeDirectoryRecursive annexdir
else error $ unlines

View file

@ -71,7 +71,9 @@ checkUnused = chain 0
return []
findunused False = do
showAction "checking for unused data"
excludeReferenced =<< getKeysPresent
-- InAnnex, not InRepository because if a direct mode
-- file exists, it is obviously not unused.
excludeReferenced =<< getKeysPresent InAnnex
chain _ [] = next $ return True
chain v (a:as) = do
v' <- a v

2
debian/changelog vendored
View file

@ -3,6 +3,8 @@ git-annex (5.20140307) UNRELEASED; urgency=medium
* Fix zombie leak and general inneficiency when copying files to a
local git repo.
* webapp: Added a "Sync now" item to each repository's menu.
* unused: In direct mode, files that are deleted from the work tree
are no longer incorrectly detected as unused.
-- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400

View file

@ -46,3 +46,5 @@ copy SHA256E-s293288--30f1367fc326f7b053012818863151206f9e3ddeab3c3fc5b5c1c573d1
copy SHA256E-s3672986--be960f6dc247df2496f634f7d788bd4a180fe556230e2dafc23ebc8fc1f10af3.JPG (checking synology...) ok
$
"""]]
> [[fixed|done]] per my comment --[[Joey]]