unused: deal with v6 unlocked file that is implicitly ingested by git diff etc
This commit is contained in:
parent
7482853ddd
commit
66f3fb1ce2
2 changed files with 78 additions and 23 deletions
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex command
|
{- git-annex command
|
||||||
-
|
-
|
||||||
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2016 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -32,8 +32,11 @@ import Types.Key
|
||||||
import Types.RefSpec
|
import Types.RefSpec
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.Sha
|
import Git.Sha
|
||||||
|
import Git.FilePath
|
||||||
import Logs.View (is_branchView)
|
import Logs.View (is_branchView)
|
||||||
import Annex.BloomFilter
|
import Annex.BloomFilter
|
||||||
|
import qualified Database.Keys
|
||||||
|
import Annex.InodeSentinal
|
||||||
|
|
||||||
cmd :: Command
|
cmd :: Command
|
||||||
cmd = command "unused" SectionMaintenance "look for unused file content"
|
cmd = command "unused" SectionMaintenance "look for unused file content"
|
||||||
|
@ -156,23 +159,29 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\
|
||||||
-
|
-
|
||||||
- Strategy:
|
- Strategy:
|
||||||
-
|
-
|
||||||
- Pass keys through 3 bloom filters in order, only creating each bloom
|
- Pass keys through these filters in order, only creating each bloom
|
||||||
- filter on demand if the previous one didn't filter out all keys.
|
- filter on demand if the previous one didn't filter out all keys.
|
||||||
-
|
-
|
||||||
- 1. All keys referenced by files in the work tree.
|
- 1. Bloom filter containing all keys referenced by files in the work tree.
|
||||||
- This is the fastest one to build and will filter out most keys.
|
- This is the fastest one to build and will filter out most keys.
|
||||||
- 2. All keys in the diff from the work tree to the index.
|
- 2. Bloom filter containing all keys in the diff from the work tree to
|
||||||
- 3. All keys in the diffs between the index and branches matching the
|
- the index.
|
||||||
- RefSpec. (This can take quite a while).
|
- 3. Associated files filter. A v6 unlocked file may have had its content
|
||||||
|
- added to the annex (by eg, git diff running the smudge filter),
|
||||||
|
- but the new key is not yet staged in the index. But if so, it will
|
||||||
|
- have an associated file.
|
||||||
|
- 4. Bloom filter containing all keys in the diffs between the index and
|
||||||
|
- branches matching the RefSpec. (This can take quite a while to build).
|
||||||
-}
|
-}
|
||||||
excludeReferenced :: RefSpec -> [Key] -> Annex [Key]
|
excludeReferenced :: RefSpec -> [Key] -> Annex [Key]
|
||||||
excludeReferenced refspec ks =
|
excludeReferenced refspec ks = runbloomfilter withKeysReferencedM ks
|
||||||
runfilter withKeysReferencedM ks
|
>>= runbloomfilter withKeysReferencedDiffIndex
|
||||||
>>= runfilter withKeysReferencedDiffIndex
|
>>= runfilter associatedFilesFilter
|
||||||
>>= runfilter (withKeysReferencedDiffGitRefs refspec)
|
>>= runbloomfilter (withKeysReferencedDiffGitRefs refspec)
|
||||||
where
|
where
|
||||||
runfilter _ [] = return [] -- optimisation
|
runfilter _ [] = return [] -- optimisation
|
||||||
runfilter a l = bloomFilter l <$> genBloomFilter a
|
runfilter a l = a l
|
||||||
|
runbloomfilter a = runfilter $ \l -> bloomFilter l <$> genBloomFilter a
|
||||||
|
|
||||||
{- Given an initial value, folds it with each key referenced by
|
{- Given an initial value, folds it with each key referenced by
|
||||||
- files in the working tree. -}
|
- files in the working tree. -}
|
||||||
|
@ -269,6 +278,24 @@ withKeysReferencedDiff a getdiff extractsha = do
|
||||||
(parseLinkOrPointer <$> catObject sha)
|
(parseLinkOrPointer <$> catObject sha)
|
||||||
>>= maybe noop a
|
>>= maybe noop a
|
||||||
|
|
||||||
|
{- Filters out keys that have an associated file that's not modified. -}
|
||||||
|
associatedFilesFilter :: [Key] -> Annex [Key]
|
||||||
|
associatedFilesFilter = filterM go
|
||||||
|
where
|
||||||
|
go k = do
|
||||||
|
cs <- Database.Keys.getInodeCaches k
|
||||||
|
if null cs
|
||||||
|
then return True
|
||||||
|
else checkunmodified cs
|
||||||
|
=<< Database.Keys.getAssociatedFiles k
|
||||||
|
checkunmodified _ [] = return True
|
||||||
|
checkunmodified cs (f:fs) = do
|
||||||
|
relf <- fromRepo $ fromTopFilePath f
|
||||||
|
ifM (sameInodeCache relf cs)
|
||||||
|
( return False
|
||||||
|
, checkunmodified cs fs
|
||||||
|
)
|
||||||
|
|
||||||
data UnusedMaps = UnusedMaps
|
data UnusedMaps = UnusedMaps
|
||||||
{ unusedMap :: UnusedMap
|
{ unusedMap :: UnusedMap
|
||||||
, unusedBadMap :: UnusedMap
|
, unusedBadMap :: UnusedMap
|
||||||
|
|
52
Test.hs
52
Test.hs
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex test suite
|
{- git-annex test suite
|
||||||
-
|
-
|
||||||
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2016 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -369,9 +369,7 @@ test_reinject = intmpclonerepoInDirect $ do
|
||||||
git_annex "drop" ["--force", sha1annexedfile] @? "drop failed"
|
git_annex "drop" ["--force", sha1annexedfile] @? "drop failed"
|
||||||
annexed_notpresent sha1annexedfile
|
annexed_notpresent sha1annexedfile
|
||||||
writeFile tmp $ content sha1annexedfile
|
writeFile tmp $ content sha1annexedfile
|
||||||
r <- annexeval $ Types.Backend.getKey backendSHA1
|
key <- Types.Key.key2file <$> getKey backendSHA1 tmp
|
||||||
Types.KeySource.KeySource { Types.KeySource.keyFilename = tmp, Types.KeySource.contentLocation = tmp, Types.KeySource.inodeCache = Nothing }
|
|
||||||
let key = Types.Key.key2file $ fromJust r
|
|
||||||
git_annex "reinject" [tmp, sha1annexedfile] @? "reinject failed"
|
git_annex "reinject" [tmp, sha1annexedfile] @? "reinject failed"
|
||||||
annexed_present sha1annexedfile
|
annexed_present sha1annexedfile
|
||||||
git_annex "fromkey" [key, sha1annexedfiledup] @? "fromkey failed for dup"
|
git_annex "fromkey" [key, sha1annexedfiledup] @? "fromkey failed for dup"
|
||||||
|
@ -789,11 +787,10 @@ test_unused :: Assertion
|
||||||
-- This test is broken in direct mode
|
-- This test is broken in direct mode
|
||||||
test_unused = intmpclonerepoInDirect $ do
|
test_unused = intmpclonerepoInDirect $ do
|
||||||
checkunused [] "in new clone"
|
checkunused [] "in new clone"
|
||||||
-- keys have to be looked up before files are removed
|
|
||||||
annexedfilekey <- annexeval $ findkey annexedfile
|
|
||||||
sha1annexedfilekey <- annexeval $ findkey sha1annexedfile
|
|
||||||
git_annex "get" [annexedfile] @? "get of file failed"
|
git_annex "get" [annexedfile] @? "get of file failed"
|
||||||
git_annex "get" [sha1annexedfile] @? "get of file failed"
|
git_annex "get" [sha1annexedfile] @? "get of file failed"
|
||||||
|
annexedfilekey <- getKey backendSHA256E annexedfile
|
||||||
|
sha1annexedfilekey <- getKey backendSHA1 sha1annexedfile
|
||||||
checkunused [] "after get"
|
checkunused [] "after get"
|
||||||
boolSystem "git" [Param "rm", Param "-fq", File annexedfile] @? "git rm failed"
|
boolSystem "git" [Param "rm", Param "-fq", File annexedfile] @? "git rm failed"
|
||||||
checkunused [] "after rm"
|
checkunused [] "after rm"
|
||||||
|
@ -820,7 +817,7 @@ test_unused = intmpclonerepoInDirect $ do
|
||||||
-- and pointed at annexed content, and think that content was unused
|
-- and pointed at annexed content, and think that content was unused
|
||||||
writeFile "unusedfile" "unusedcontent"
|
writeFile "unusedfile" "unusedcontent"
|
||||||
git_annex "add" ["unusedfile"] @? "add of unusedfile failed"
|
git_annex "add" ["unusedfile"] @? "add of unusedfile failed"
|
||||||
unusedfilekey <- annexeval $ findkey "unusedfile"
|
unusedfilekey <- getKey backendSHA256E "unusedfile"
|
||||||
renameFile "unusedfile" "unusedunstagedfile"
|
renameFile "unusedfile" "unusedunstagedfile"
|
||||||
boolSystem "git" [Param "rm", Param "-qf", File "unusedfile"] @? "git rm failed"
|
boolSystem "git" [Param "rm", Param "-qf", File "unusedfile"] @? "git rm failed"
|
||||||
checkunused [] "with unstaged link"
|
checkunused [] "with unstaged link"
|
||||||
|
@ -832,7 +829,7 @@ test_unused = intmpclonerepoInDirect $ do
|
||||||
writeFile "unusedfile" "unusedcontent"
|
writeFile "unusedfile" "unusedcontent"
|
||||||
git_annex "add" ["unusedfile"] @? "add of unusedfile failed"
|
git_annex "add" ["unusedfile"] @? "add of unusedfile failed"
|
||||||
boolSystem "git" [Param "add", File "unusedfile"] @? "git add failed"
|
boolSystem "git" [Param "add", File "unusedfile"] @? "git add failed"
|
||||||
unusedfilekey' <- annexeval $ findkey "unusedfile"
|
unusedfilekey' <- getKey backendSHA256E "unusedfile"
|
||||||
checkunused [] "with staged deleted link"
|
checkunused [] "with staged deleted link"
|
||||||
boolSystem "git" [Param "rm", Param "-qf", File "unusedfile"] @? "git rm failed"
|
boolSystem "git" [Param "rm", Param "-qf", File "unusedfile"] @? "git rm failed"
|
||||||
checkunused [unusedfilekey'] "with staged link deleted"
|
checkunused [unusedfilekey'] "with staged link deleted"
|
||||||
|
@ -846,6 +843,27 @@ test_unused = intmpclonerepoInDirect $ do
|
||||||
removeFile "unusedfile"
|
removeFile "unusedfile"
|
||||||
checkunused [] "with staged deleted file"
|
checkunused [] "with staged deleted file"
|
||||||
|
|
||||||
|
-- When an unlocked file is modified, git diff will cause git-annex
|
||||||
|
-- to add its content to the repository. Make sure that's not
|
||||||
|
-- found as unused.
|
||||||
|
whenM (unlockedFiles <$> getTestMode) $ do
|
||||||
|
let f = "unlockedfile"
|
||||||
|
writeFile f "unlockedcontent1"
|
||||||
|
boolSystem "git" [Param "add", File "unlockedfile"] @? "git add failed"
|
||||||
|
checkunused [] "with unlocked file before modification"
|
||||||
|
writeFile f "unlockedcontent2"
|
||||||
|
checkunused [] "with unlocked file after modification"
|
||||||
|
not <$> boolSystem "git" [Param "diff", Param "--quiet", File f] @? "git diff did not show changes to unlocked file"
|
||||||
|
ver2key <- getKey backendSHA256E "unlockedfile"
|
||||||
|
-- still nothing unused because one version is in the index
|
||||||
|
-- and the other is in the work tree
|
||||||
|
checkunused [] "with unlocked file after git diff"
|
||||||
|
writeFile f "unlockedcontent3"
|
||||||
|
-- original version is still in index; version 2 is unused
|
||||||
|
-- now, and version 3 is in work tree
|
||||||
|
checkunused [ver2key] "with unlocked file after second modification"
|
||||||
|
not <$> boolSystem "git" [Param "diff", Param "--quiet", File f] @? "git diff did not show changes to unlocked file"
|
||||||
|
checkunused [ver2key] "with unlocked file after second git diff"
|
||||||
where
|
where
|
||||||
checkunused expectedkeys desc = do
|
checkunused expectedkeys desc = do
|
||||||
git_annex "unused" [] @? "unused failed"
|
git_annex "unused" [] @? "unused failed"
|
||||||
|
@ -853,9 +871,6 @@ test_unused = intmpclonerepoInDirect $ do
|
||||||
let unusedkeys = M.elems unusedmap
|
let unusedkeys = M.elems unusedmap
|
||||||
assertEqual ("unused keys differ " ++ desc)
|
assertEqual ("unused keys differ " ++ desc)
|
||||||
(sort expectedkeys) (sort unusedkeys)
|
(sort expectedkeys) (sort unusedkeys)
|
||||||
findkey f = do
|
|
||||||
r <- Annex.WorkTree.lookupFile f
|
|
||||||
return $ fromJust r
|
|
||||||
|
|
||||||
test_describe :: Assertion
|
test_describe :: Assertion
|
||||||
test_describe = intmpclonerepo $ do
|
test_describe = intmpclonerepo $ do
|
||||||
|
@ -1976,10 +1991,23 @@ backendSHA1 = backend_ "SHA1"
|
||||||
backendSHA256 :: Types.Backend
|
backendSHA256 :: Types.Backend
|
||||||
backendSHA256 = backend_ "SHA256"
|
backendSHA256 = backend_ "SHA256"
|
||||||
|
|
||||||
|
backendSHA256E :: Types.Backend
|
||||||
|
backendSHA256E = backend_ "SHA256E"
|
||||||
|
|
||||||
backendWORM :: Types.Backend
|
backendWORM :: Types.Backend
|
||||||
backendWORM = backend_ "WORM"
|
backendWORM = backend_ "WORM"
|
||||||
|
|
||||||
backend_ :: String -> Types.Backend
|
backend_ :: String -> Types.Backend
|
||||||
backend_ = Backend.lookupBackendName
|
backend_ = Backend.lookupBackendName
|
||||||
|
|
||||||
|
getKey :: Types.Backend -> FilePath -> IO Types.Key
|
||||||
|
getKey b f = fromJust <$> annexeval go
|
||||||
|
where
|
||||||
|
go = Types.Backend.getKey b
|
||||||
|
Types.KeySource.KeySource
|
||||||
|
{ Types.KeySource.keyFilename = f
|
||||||
|
, Types.KeySource.contentLocation = f
|
||||||
|
, Types.KeySource.inodeCache = Nothing
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue