unused/dropunused: support --from
This commit is contained in:
parent
09a16176de
commit
868300d4c1
6 changed files with 147 additions and 44 deletions
|
@ -19,6 +19,8 @@ import Messages
|
|||
import Locations
|
||||
import qualified Annex
|
||||
import qualified Command.Drop
|
||||
import qualified Command.Move
|
||||
import qualified Remote
|
||||
import Backend
|
||||
import Key
|
||||
|
||||
|
@ -40,15 +42,28 @@ start m s = notBareRepo $ do
|
|||
case M.lookup s m of
|
||||
Nothing -> return Nothing
|
||||
Just key -> do
|
||||
g <- Annex.gitRepo
|
||||
showStart "dropunused" s
|
||||
backend <- keyBackend key
|
||||
-- drop both content in the backend and any tmp
|
||||
-- file for the key
|
||||
let tmp = gitAnnexTmpLocation g key
|
||||
tmp_exists <- liftIO $ doesFileExist tmp
|
||||
when tmp_exists $ liftIO $ removeFile tmp
|
||||
return $ Just $ Command.Drop.perform key backend (Just 0)
|
||||
from <- Annex.getState Annex.fromremote
|
||||
case from of
|
||||
Just name -> do
|
||||
r <- Remote.byName name
|
||||
return $ Just $ performRemote r key
|
||||
_ -> return $ Just $ perform key
|
||||
|
||||
{- drop both content in the backend and any tmp file for the key -}
|
||||
perform :: Key -> CommandPerform
|
||||
perform key = do
|
||||
g <- Annex.gitRepo
|
||||
let tmp = gitAnnexTmpLocation g key
|
||||
tmp_exists <- liftIO $ doesFileExist tmp
|
||||
when tmp_exists $ liftIO $ removeFile tmp
|
||||
backend <- keyBackend key
|
||||
Command.Drop.perform key backend (Just 0) -- force drop
|
||||
|
||||
performRemote :: Remote.Remote Annex -> Key -> CommandPerform
|
||||
performRemote r key = do
|
||||
showNote $ "from " ++ Remote.name r ++ "..."
|
||||
return $ Just $ Command.Move.fromCleanup r True key
|
||||
|
||||
readUnusedLog :: Annex (M.Map String Key)
|
||||
readUnusedLog = do
|
||||
|
|
|
@ -20,9 +20,11 @@ import Content
|
|||
import Messages
|
||||
import Locations
|
||||
import Utility
|
||||
import LocationLog
|
||||
import qualified Annex
|
||||
import qualified GitRepo as Git
|
||||
import qualified Backend
|
||||
import qualified Remote
|
||||
|
||||
command :: [Command]
|
||||
command = [repoCommand "unused" paramNothing seek
|
||||
|
@ -39,35 +41,54 @@ start = notBareRepo $ do
|
|||
|
||||
perform :: CommandPerform
|
||||
perform = do
|
||||
_ <- checkUnused
|
||||
from <- Annex.getState Annex.fromremote
|
||||
case from of
|
||||
Just name -> do
|
||||
r <- Remote.byName name
|
||||
checkRemoteUnused r
|
||||
_ -> checkUnused
|
||||
return $ Just $ return True
|
||||
|
||||
checkUnused :: Annex Bool
|
||||
checkUnused :: Annex ()
|
||||
checkUnused = do
|
||||
(unused, staletmp) <- unusedKeys
|
||||
let unusedlist = number 0 unused
|
||||
let staletmplist = number (length unused) staletmp
|
||||
let list = unusedlist ++ staletmplist
|
||||
g <- Annex.gitRepo
|
||||
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $
|
||||
map (\(n, k) -> show n ++ " " ++ show k) list
|
||||
unless (null unused) $ showLongNote $ unusedmsg unusedlist
|
||||
unless (null staletmp) $ showLongNote $ staletmpmsg staletmplist
|
||||
writeUnusedFile list
|
||||
unless (null unused) $ showLongNote $ unusedMsg unusedlist
|
||||
unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist
|
||||
unless (null list) $ showLongNote $ "\n"
|
||||
return $ null list
|
||||
|
||||
checkRemoteUnused :: Remote.Remote Annex -> Annex ()
|
||||
checkRemoteUnused r = do
|
||||
g <- Annex.gitRepo
|
||||
showNote $ "checking for unused data on " ++ Remote.name r ++ "..."
|
||||
referenced <- getKeysReferenced
|
||||
logged <- liftIO $ loggedKeys g
|
||||
remotehas <- filterM isthere logged
|
||||
let remoteunused = remotehas `exclude` referenced
|
||||
let list = number 0 remoteunused
|
||||
writeUnusedFile list
|
||||
unless (null remoteunused) $ do
|
||||
showLongNote $ remoteUnusedMsg r list
|
||||
showLongNote $ "\n"
|
||||
where
|
||||
isthere k = do
|
||||
g <- Annex.gitRepo
|
||||
us <- liftIO $ keyLocations g k
|
||||
return $ uuid `elem` us
|
||||
uuid = Remote.uuid r
|
||||
|
||||
writeUnusedFile :: [(Int, Key)] -> Annex ()
|
||||
writeUnusedFile l = do
|
||||
g <- Annex.gitRepo
|
||||
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $
|
||||
unlines $ map (\(n, k) -> show n ++ " " ++ show k) l
|
||||
|
||||
table :: [(Int, Key)] -> [String]
|
||||
table l = [" NUMBER KEY"] ++ map cols l
|
||||
where
|
||||
unusedmsg u = unlines $
|
||||
["Some annexed data is no longer pointed to by any files in the repository:"]
|
||||
++ table u ++
|
||||
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
|
||||
dropmsg
|
||||
staletmpmsg t = unlines $
|
||||
["Some partially transferred data exists in temporary files:"]
|
||||
++ table t ++ dropmsg
|
||||
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
|
||||
|
||||
table l = [" NUMBER KEY"] ++ map cols l
|
||||
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
|
||||
pad n s = s ++ replicate (n - length s) ' '
|
||||
|
||||
|
@ -75,6 +96,39 @@ number :: Int -> [a] -> [(Int, a)]
|
|||
number _ [] = []
|
||||
number n (x:xs) = (n+1, x):(number (n+1) xs)
|
||||
|
||||
staleTmpMsg :: [(Int, Key)] -> String
|
||||
staleTmpMsg t = unlines $
|
||||
["Some partially transferred data exists in temporary files:"]
|
||||
++ table t ++ [dropMsg Nothing]
|
||||
|
||||
unusedMsg :: [(Int, Key)] -> String
|
||||
unusedMsg u = unusedMsg' u
|
||||
["Some annexed data is no longer used by any files in the repository:"]
|
||||
[dropMsg Nothing]
|
||||
|
||||
remoteUnusedMsg :: Remote.Remote Annex -> [(Int, Key)] -> String
|
||||
remoteUnusedMsg r u = unusedMsg' u
|
||||
["Some annexed data on " ++ name ++
|
||||
" is not used by any files in this repository."]
|
||||
[dropMsg $ Just r,
|
||||
"Please be cautious -- are you sure that the remote repository",
|
||||
"does not use this data?"]
|
||||
where
|
||||
name = Remote.name r
|
||||
|
||||
unusedMsg' :: [(Int, Key)] -> [String] -> [String] -> String
|
||||
unusedMsg' u header trailer = unlines $
|
||||
header ++
|
||||
table u ++
|
||||
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
|
||||
trailer
|
||||
|
||||
dropMsg :: Maybe (Remote.Remote Annex) -> String
|
||||
dropMsg Nothing = dropMsg' ""
|
||||
dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
|
||||
dropMsg' :: String -> String
|
||||
dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)"
|
||||
|
||||
{- Finds keys whose content is present, but that do not seem to be used
|
||||
- by any files in the git repo, or that are only present as tmp files. -}
|
||||
unusedKeys :: Annex ([Key], [Key])
|
||||
|
@ -93,7 +147,9 @@ unusedKeys = do
|
|||
referenced <- getKeysReferenced
|
||||
tmps <- tmpKeys
|
||||
|
||||
let (unused, staletmp, duptmp) = calcUnusedKeys present referenced tmps
|
||||
let unused = present `exclude` referenced
|
||||
let staletmp = tmps `exclude` present
|
||||
let duptmp = tmps `exclude` staletmp
|
||||
|
||||
-- Tmp files that are dups of content already present
|
||||
-- can simply be removed.
|
||||
|
@ -102,18 +158,16 @@ unusedKeys = do
|
|||
|
||||
return (unused, staletmp)
|
||||
|
||||
calcUnusedKeys :: [Key] -> [Key] -> [Key] -> ([Key], [Key], [Key])
|
||||
calcUnusedKeys present referenced tmps = (unused, staletmp, duptmp)
|
||||
{- Finds items in the first, smaller list, that are not
|
||||
- present in the second, larger list.
|
||||
-
|
||||
- Constructing a single set, of the list that tends to be
|
||||
- smaller, appears more efficient in both memory and CPU
|
||||
- than constructing and taking the S.difference of two sets. -}
|
||||
exclude :: Ord a => [a] -> [a] -> [a]
|
||||
exclude [] _ = [] -- optimisation
|
||||
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
|
||||
where
|
||||
unused = present `exclude` referenced
|
||||
staletmp = tmps `exclude` present
|
||||
duptmp = tmps `exclude` staletmp
|
||||
|
||||
-- Constructing a single set, of the list that tends to be
|
||||
-- smaller, appears more efficient in both memory and CPU
|
||||
-- than constructing and taking the S.difference of two sets.
|
||||
exclude [] _ = [] -- optimisation
|
||||
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
|
||||
remove a b = foldl (flip S.delete) b a
|
||||
|
||||
{- List of keys referenced by symlinks in the git repo. -}
|
||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -4,6 +4,9 @@ git-annex (0.20110402) UNRELEASED; urgency=low
|
|||
some issues with git on OSX with the mixed-case directories.
|
||||
No migration is needed; the old mixed case hash directories are still
|
||||
read; new information is written to the new directories.
|
||||
* Unused files on remotes, particulary special remotes, can now be
|
||||
identified and dropped, by using "--from remote" with git annex unused
|
||||
and git annex dropunused.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400
|
||||
|
||||
|
|
|
@ -155,16 +155,21 @@ Many git-annex commands will stage changes for later `git commit` by you.
|
|||
|
||||
* unused
|
||||
|
||||
Checks the annex for data that is not used by any files currently
|
||||
in the annex, and prints a numbered list of the data.
|
||||
Checks the annex for data that does not correspond to any files currently
|
||||
in the respository, and prints a numbered list of the data.
|
||||
|
||||
To only show unused temp files, specify --fast
|
||||
|
||||
To check data on a remote that does not correspond to any files currently
|
||||
in the local repository, specify --from.
|
||||
|
||||
* dropunused [number ...]
|
||||
|
||||
Drops the data corresponding to the numbers, as listed by the last
|
||||
`git annex unused`
|
||||
|
||||
To drop the data from a remote, specify --from.
|
||||
|
||||
* find [path ...]
|
||||
|
||||
Outputs a list of annexed files whose content is currently present.
|
||||
|
@ -317,12 +322,15 @@ Many git-annex commands will stage changes for later `git commit` by you.
|
|||
|
||||
* --from=repository
|
||||
|
||||
Specifies a repository that content will be retrieved from.
|
||||
Specifies a repository that content will be retrieved from, or that
|
||||
should otherwise be acted on.
|
||||
|
||||
It should be specified using the name of a configured remote.
|
||||
|
||||
* --to=repository
|
||||
|
||||
Specifies a repository that content will be sent to.
|
||||
Specifies a repository that content will be sent to.
|
||||
|
||||
It should be specified using the name of a configured remote.
|
||||
|
||||
* --exclude=glob
|
||||
|
|
|
@ -8,3 +8,26 @@ They cannot be used by other git commands though.
|
|||
|
||||
* [[Amazon_S3]]
|
||||
* [[directory]]
|
||||
|
||||
## Unused content on special remotes
|
||||
|
||||
Over time, special remotes can accumulate file content that is no longer
|
||||
referred to by files in git. Normally, unused content in the current
|
||||
repository is found by running `git annex unused`. To detect unused content
|
||||
on special remotes, instead use `git annex unused --from`. Example:
|
||||
|
||||
$ git annex unused --from mys3
|
||||
unused (checking for unused data on mys3...)
|
||||
Some annexed data on mys3 is not used by any files in this repository.
|
||||
NUMBER KEY
|
||||
1 WORM-s3-m1301674316--foo
|
||||
(To see where data was previously used, try: git log --stat -S'KEY')
|
||||
(To remove unwanted data: git-annex dropunused --from mys3 NUMBER)
|
||||
Please be cautious -- are you sure that the remote repository
|
||||
does not use this data?
|
||||
$ git annex dropunused --from mys3 1
|
||||
dropunused 12948 (from mys3...) ok
|
||||
|
||||
Do be cautious when using this; it cannot detect if content in a remote
|
||||
is used by that remote, or is the last copy of data that is used by
|
||||
some *other* remote.
|
||||
|
|
|
@ -10,7 +10,7 @@ eliminate it to save space.
|
|||
|
||||
# git annex unused
|
||||
unused (checking for unused data...)
|
||||
Some annexed data is no longer pointed to by any files in the repository.
|
||||
Some annexed data is no longer used by any files in the repository.
|
||||
NUMBER KEY
|
||||
1 WORM-s3-m1289672605--file
|
||||
2 WORM-s14-m1289672605--file
|
||||
|
|
Loading…
Reference in a new issue