unused: Add --used option, which can specify a set of refs to consider used, rather than the default of considering all refs used.

This commit is contained in:
Joey Hess 2015-05-14 15:31:38 -04:00
parent a2fd8be337
commit 86699ff861
6 changed files with 116 additions and 25 deletions

View file

@ -95,6 +95,8 @@ paramFile :: String
paramFile = "FILE"
paramRef :: String
paramRef = "REF"
paramRefSpec :: String
paramRefSpec = "REFSPEC"
paramGroup :: String
paramGroup = "GROUP"
paramExpression :: String

View file

@ -31,34 +31,41 @@ import qualified Remote
import qualified Annex.Branch
import Annex.CatFile
import Types.Key
import Types.RefSpec
import Git.FilePath
import Logs.View (is_branchView)
import Utility.Bloom
cmd :: [Command]
cmd = [withOptions [unusedFromOption] $ command "unused" paramNothing seek
SectionMaintenance "look for unused file content"]
cmd = [withOptions [unusedFromOption, refSpecOption] $
command "unused" paramNothing seek
SectionMaintenance "look for unused file content"]
unusedFromOption :: Option
unusedFromOption = fieldOption ['f'] "from" paramRemote "remote to check for unused content"
refSpecOption :: Option
refSpecOption = fieldOption [] "used-refspec" paramRefSpec "refs to consider used (default: all refs)"
seek :: CommandSeek
seek = withNothing start
{- Finds unused content in the annex. -}
start :: CommandStart
start = do
from <- Annex.getField $ optionName unusedFromOption
!refspec <- maybe allRefSpec (either error id . parseRefSpec)
<$> Annex.getField (optionName refSpecOption)
from <- Annex.getField (optionName unusedFromOption)
let (name, action) = case from of
Nothing -> (".", checkUnused)
Just "." -> (".", checkUnused)
Just "here" -> (".", checkUnused)
Just n -> (n, checkRemoteUnused n)
Nothing -> (".", checkUnused refspec)
Just "." -> (".", checkUnused refspec)
Just "here" -> (".", checkUnused refspec)
Just n -> (n, checkRemoteUnused n refspec)
showStart "unused" name
next action
checkUnused :: CommandPerform
checkUnused = chain 0
checkUnused :: RefSpec -> CommandPerform
checkUnused refspec = chain 0
[ check "" unusedMsg $ findunused =<< Annex.getState Annex.fast
, check "bad" staleBadMsg $ staleKeysPrune gitAnnexBadDir False
, check "tmp" staleTmpMsg $ staleKeysPrune gitAnnexTmpObjectDir True
@ -71,20 +78,20 @@ checkUnused = chain 0
showAction "checking for unused data"
-- InAnnex, not InRepository because if a direct mode
-- file exists, it is obviously not unused.
excludeReferenced =<< getKeysPresent InAnnex
excludeReferenced refspec =<< getKeysPresent InAnnex
chain _ [] = next $ return True
chain v (a:as) = do
v' <- a v
chain v' as
checkRemoteUnused :: String -> CommandPerform
checkRemoteUnused name = go =<< fromJust <$> Remote.byNameWithUUID (Just name)
checkRemoteUnused :: String -> RefSpec -> CommandPerform
checkRemoteUnused name refspec = go =<< fromJust <$> Remote.byNameWithUUID (Just name)
where
go r = do
showAction "checking for unused data"
_ <- check "" (remoteUnusedMsg r) (remoteunused r) 0
next $ return True
remoteunused r = excludeReferenced <=< loggedKeysFor $ Remote.uuid r
remoteunused r = excludeReferenced refspec <=< loggedKeysFor $ Remote.uuid r
check :: FilePath -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int
check file msg a c = do
@ -145,7 +152,7 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\
- * Build a bloom filter of all keys referenced by symlinks. This
- is the fastest one to build and will filter out most keys.
- * If keys remain, build a second bloom filter of keys referenced by
- all branches.
- branches maching the RefSpec.
- * The list is streamed through these bloom filters lazily, so both will
- exist at the same time. This means that twice the memory is used,
- but they're relatively small, so the added complexity of using a
@ -157,13 +164,13 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\
- Short-circuiting if the first filter filters all the keys handles the
- other common case.
-}
excludeReferenced :: [Key] -> Annex [Key]
excludeReferenced ks = runfilter firstlevel ks >>= runfilter secondlevel
excludeReferenced :: RefSpec -> [Key] -> Annex [Key]
excludeReferenced refspec ks = runfilter firstlevel ks >>= runfilter secondlevel
where
runfilter _ [] = return [] -- optimisation
runfilter a l = bloomFilter show l <$> genBloomFilter show a
firstlevel = withKeysReferencedM
secondlevel = withKeysReferencedInGit
secondlevel = withKeysReferencedInGit refspec
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
@ -258,14 +265,15 @@ withKeysReferenced' mdir initial a = do
!v' <- a k f v
go v' fs
withKeysReferencedInGit :: (Key -> Annex ()) -> Annex ()
withKeysReferencedInGit a = do
withKeysReferencedInGit :: RefSpec -> (Key -> Annex ()) -> Annex ()
withKeysReferencedInGit refspec a = do
current <- inRepo Git.Branch.currentUnsafe
shaHead <- maybe (return Nothing) (inRepo . Git.Ref.sha) current
showref >>= mapM_ (withKeysReferencedInGitRef a) .
relevantrefs (shaHead, current)
usedrefs <- applyRefSpec refspec . relevantrefs (shaHead, current)
<$> inRepo (Git.Command.pipeReadStrict [Param "show-ref"])
forM_ usedrefs $
withKeysReferencedInGitRef a
where
showref = inRepo $ Git.Command.pipeReadStrict [Param "show-ref"]
relevantrefs headRef = addHead headRef .
filter ourbranches .
map (separate (== ' ')) .
@ -293,8 +301,8 @@ withKeysReferencedInGitRef a ref = do
showAction $ "checking " ++ Git.Ref.describe ref
bare <- isBareRepo
(ts,clean) <- inRepo $ if bare
then DiffTree.diffIndex ref
else DiffTree.diffWorkTree ref
then DiffTree.diffIndex ref
else DiffTree.diffWorkTree ref
let lookAtWorkingTree = not bare && ref == Git.Ref.headRef
forM_ ts $ tKey lookAtWorkingTree >=> maybe noop a
liftIO $ void clean

44
Types/RefSpec.hs Normal file
View file

@ -0,0 +1,44 @@
{- This is not the same as git's fetch/push refspecs.
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Types.RefSpec where
import Common
import Utility.Glob
import Git.Types
import Data.Either
type RefSpec = [RefSpecPart]
data RefSpecPart = AddRef Ref | AddMatching Glob | RemoveMatching Glob
allRefSpec :: RefSpec
allRefSpec = [AddMatching $ compileGlob "*" CaseSensative]
parseRefSpec :: String -> Either String RefSpec
parseRefSpec v = case partitionEithers (map mk $ split ":" v) of
([],refspec) -> Right refspec
(e:_,_) -> Left e
where
mk ('+':s)
| any (`elem` s) "*?" =
Right $ AddMatching $ compileGlob s CaseSensative
| otherwise = Right $ AddRef $ Ref s
mk ('-':s) = Right $ RemoveMatching $ compileGlob s CaseSensative
mk s = Left $ "bad refspec item \"" ++ s ++ "\" (expected + or - prefix)"
applyRefSpec :: RefSpec -> [Ref] -> [Ref]
applyRefSpec refspec rs = go [] refspec
where
go c [] = reverse c
go c (AddRef r : rest) = go (r:c) rest
go c (AddMatching g : rest) =
let add = filter (matchGlob g . fromRef) rs
in go (add ++ c) rest
go c (RemoveMatching g : rest) =
go (filter (not . matchGlob g . fromRef) c) rest

2
debian/changelog vendored
View file

@ -18,6 +18,8 @@ git-annex (5.20150508.2) UNRELEASED; urgency=medium
running at once.
* Stale transfer lock and info files will be cleaned up automatically
when get/unused/info commands are run.
* unused: Add --used option, which can specify a set of refs to consider
used, rather than the default of considering all refs used.
-- Joey Hess <id@joeyh.name> Mon, 11 May 2015 12:45:06 -0400

View file

@ -26,7 +26,37 @@ For example, to move all unused data to origin:
* `--from=remote`
Check for unused data on a remote.
Check for unused data that is located on a remote.
* `--used-refspec=+ref:-ref`
By default, any data that the work tree uses, or that any refs in the git
repository point to is considered to be used. If you only want to use
some refs, you can use this option to specify the ones to use. Data that
is not in the specified refs (and not used by the work tree) will then be
considered unused.
# REFSPEC
The refspec format for --used-refspec is a colon-separated list of
additions and removals of refs. For example:
+refs/heads/*:+HEAD^:+refs/tags/*:-refs/tags/old-tag
This adds all refs/heads/ refs, as well as the previous version
of HEAD. It also adds all tags, except for old-tag.
This refspec is processed by starting with an empty set of refs,
and walking the list in order from left to right.
* Each + using a glob is matched against all relevant refs
(a subset of `git show-ref`) and all matching refs are added
to the set.
For example, "+refs/remotes/*" adds all remote refs.
* Each + without a glob adds the literal value to the set.
For example, "+HEAD^" adds "HEAD^".
* Each - is matched against the set of refs accumulated so far.
Any matching refs are removed from the set.
# SEE ALSO

View file

@ -33,4 +33,9 @@ refspec in order.
the SHAs that the refs point to, so -refs/heads/master does not remove
+HEAD).
Hmm, unused currently does a separate pass to find files used in the work
tree. I think it's best to keep that as-is.
--[[Joey]]
> [[done]] --[[Joey]]