From 66f3fb1ce240d410bbd6a6c3d599c494119234b5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Jan 2016 22:11:21 -0400 Subject: [PATCH] unused: deal with v6 unlocked file that is implicitly ingested by git diff etc --- Command/Unused.hs | 49 ++++++++++++++++++++++++++++++++++---------- Test.hs | 52 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 78 insertions(+), 23 deletions(-) diff --git a/Command/Unused.hs b/Command/Unused.hs index bb5d7c685a..84be0eefbe 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2015 Joey Hess + - Copyright 2010-2016 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -32,8 +32,11 @@ import Types.Key import Types.RefSpec import Git.Types import Git.Sha +import Git.FilePath import Logs.View (is_branchView) import Annex.BloomFilter +import qualified Database.Keys +import Annex.InodeSentinal cmd :: Command cmd = command "unused" SectionMaintenance "look for unused file content" @@ -156,23 +159,29 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\ - - Strategy: - - - Pass keys through 3 bloom filters in order, only creating each bloom + - Pass keys through these filters in order, only creating each bloom - filter on demand if the previous one didn't filter out all keys. - - - 1. All keys referenced by files in the work tree. + - 1. Bloom filter containing all keys referenced by files in the work tree. - This is the fastest one to build and will filter out most keys. - - 2. All keys in the diff from the work tree to the index. - - 3. All keys in the diffs between the index and branches matching the - - RefSpec. (This can take quite a while). + - 2. Bloom filter containing all keys in the diff from the work tree to + - the index. + - 3. Associated files filter. A v6 unlocked file may have had its content + - added to the annex (by eg, git diff running the smudge filter), + - but the new key is not yet staged in the index. But if so, it will + - have an associated file. + - 4. Bloom filter containing all keys in the diffs between the index and + - branches matching the RefSpec. (This can take quite a while to build). -} excludeReferenced :: RefSpec -> [Key] -> Annex [Key] -excludeReferenced refspec ks = - runfilter withKeysReferencedM ks - >>= runfilter withKeysReferencedDiffIndex - >>= runfilter (withKeysReferencedDiffGitRefs refspec) +excludeReferenced refspec ks = runbloomfilter withKeysReferencedM ks + >>= runbloomfilter withKeysReferencedDiffIndex + >>= runfilter associatedFilesFilter + >>= runbloomfilter (withKeysReferencedDiffGitRefs refspec) where runfilter _ [] = return [] -- optimisation - runfilter a l = bloomFilter l <$> genBloomFilter a + runfilter a l = a l + runbloomfilter a = runfilter $ \l -> bloomFilter l <$> genBloomFilter a {- Given an initial value, folds it with each key referenced by - files in the working tree. -} @@ -269,6 +278,24 @@ withKeysReferencedDiff a getdiff extractsha = do (parseLinkOrPointer <$> catObject sha) >>= maybe noop a +{- Filters out keys that have an associated file that's not modified. -} +associatedFilesFilter :: [Key] -> Annex [Key] +associatedFilesFilter = filterM go + where + go k = do + cs <- Database.Keys.getInodeCaches k + if null cs + then return True + else checkunmodified cs + =<< Database.Keys.getAssociatedFiles k + checkunmodified _ [] = return True + checkunmodified cs (f:fs) = do + relf <- fromRepo $ fromTopFilePath f + ifM (sameInodeCache relf cs) + ( return False + , checkunmodified cs fs + ) + data UnusedMaps = UnusedMaps { unusedMap :: UnusedMap , unusedBadMap :: UnusedMap diff --git a/Test.hs b/Test.hs index ba491f3d8d..9f7c7e328a 100644 --- a/Test.hs +++ b/Test.hs @@ -1,6 +1,6 @@ {- git-annex test suite - - - Copyright 2010-2015 Joey Hess + - Copyright 2010-2016 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -369,9 +369,7 @@ test_reinject = intmpclonerepoInDirect $ do git_annex "drop" ["--force", sha1annexedfile] @? "drop failed" annexed_notpresent sha1annexedfile writeFile tmp $ content sha1annexedfile - r <- annexeval $ Types.Backend.getKey backendSHA1 - Types.KeySource.KeySource { Types.KeySource.keyFilename = tmp, Types.KeySource.contentLocation = tmp, Types.KeySource.inodeCache = Nothing } - let key = Types.Key.key2file $ fromJust r + key <- Types.Key.key2file <$> getKey backendSHA1 tmp git_annex "reinject" [tmp, sha1annexedfile] @? "reinject failed" annexed_present sha1annexedfile git_annex "fromkey" [key, sha1annexedfiledup] @? "fromkey failed for dup" @@ -789,11 +787,10 @@ test_unused :: Assertion -- This test is broken in direct mode test_unused = intmpclonerepoInDirect $ do checkunused [] "in new clone" - -- keys have to be looked up before files are removed - annexedfilekey <- annexeval $ findkey annexedfile - sha1annexedfilekey <- annexeval $ findkey sha1annexedfile git_annex "get" [annexedfile] @? "get of file failed" git_annex "get" [sha1annexedfile] @? "get of file failed" + annexedfilekey <- getKey backendSHA256E annexedfile + sha1annexedfilekey <- getKey backendSHA1 sha1annexedfile checkunused [] "after get" boolSystem "git" [Param "rm", Param "-fq", File annexedfile] @? "git rm failed" checkunused [] "after rm" @@ -820,7 +817,7 @@ test_unused = intmpclonerepoInDirect $ do -- and pointed at annexed content, and think that content was unused writeFile "unusedfile" "unusedcontent" git_annex "add" ["unusedfile"] @? "add of unusedfile failed" - unusedfilekey <- annexeval $ findkey "unusedfile" + unusedfilekey <- getKey backendSHA256E "unusedfile" renameFile "unusedfile" "unusedunstagedfile" boolSystem "git" [Param "rm", Param "-qf", File "unusedfile"] @? "git rm failed" checkunused [] "with unstaged link" @@ -832,7 +829,7 @@ test_unused = intmpclonerepoInDirect $ do writeFile "unusedfile" "unusedcontent" git_annex "add" ["unusedfile"] @? "add of unusedfile failed" boolSystem "git" [Param "add", File "unusedfile"] @? "git add failed" - unusedfilekey' <- annexeval $ findkey "unusedfile" + unusedfilekey' <- getKey backendSHA256E "unusedfile" checkunused [] "with staged deleted link" boolSystem "git" [Param "rm", Param "-qf", File "unusedfile"] @? "git rm failed" checkunused [unusedfilekey'] "with staged link deleted" @@ -846,6 +843,27 @@ test_unused = intmpclonerepoInDirect $ do removeFile "unusedfile" checkunused [] "with staged deleted file" + -- When an unlocked file is modified, git diff will cause git-annex + -- to add its content to the repository. Make sure that's not + -- found as unused. + whenM (unlockedFiles <$> getTestMode) $ do + let f = "unlockedfile" + writeFile f "unlockedcontent1" + boolSystem "git" [Param "add", File "unlockedfile"] @? "git add failed" + checkunused [] "with unlocked file before modification" + writeFile f "unlockedcontent2" + checkunused [] "with unlocked file after modification" + not <$> boolSystem "git" [Param "diff", Param "--quiet", File f] @? "git diff did not show changes to unlocked file" + ver2key <- getKey backendSHA256E "unlockedfile" + -- still nothing unused because one version is in the index + -- and the other is in the work tree + checkunused [] "with unlocked file after git diff" + writeFile f "unlockedcontent3" + -- original version is still in index; version 2 is unused + -- now, and version 3 is in work tree + checkunused [ver2key] "with unlocked file after second modification" + not <$> boolSystem "git" [Param "diff", Param "--quiet", File f] @? "git diff did not show changes to unlocked file" + checkunused [ver2key] "with unlocked file after second git diff" where checkunused expectedkeys desc = do git_annex "unused" [] @? "unused failed" @@ -853,9 +871,6 @@ test_unused = intmpclonerepoInDirect $ do let unusedkeys = M.elems unusedmap assertEqual ("unused keys differ " ++ desc) (sort expectedkeys) (sort unusedkeys) - findkey f = do - r <- Annex.WorkTree.lookupFile f - return $ fromJust r test_describe :: Assertion test_describe = intmpclonerepo $ do @@ -1976,10 +1991,23 @@ backendSHA1 = backend_ "SHA1" backendSHA256 :: Types.Backend backendSHA256 = backend_ "SHA256" +backendSHA256E :: Types.Backend +backendSHA256E = backend_ "SHA256E" + backendWORM :: Types.Backend backendWORM = backend_ "WORM" backend_ :: String -> Types.Backend backend_ = Backend.lookupBackendName +getKey :: Types.Backend -> FilePath -> IO Types.Key +getKey b f = fromJust <$> annexeval go + where + go = Types.Backend.getKey b + Types.KeySource.KeySource + { Types.KeySource.keyFilename = f + , Types.KeySource.contentLocation = f + , Types.KeySource.inodeCache = Nothing + } + #endif