unused/dropunused: support --from

This commit is contained in:
Joey Hess 2011-04-02 20:59:41 -04:00
parent 09a16176de
commit 868300d4c1
6 changed files with 147 additions and 44 deletions

View file

@ -19,6 +19,8 @@ import Messages
import Locations import Locations
import qualified Annex import qualified Annex
import qualified Command.Drop import qualified Command.Drop
import qualified Command.Move
import qualified Remote
import Backend import Backend
import Key import Key
@ -40,15 +42,28 @@ start m s = notBareRepo $ do
case M.lookup s m of case M.lookup s m of
Nothing -> return Nothing Nothing -> return Nothing
Just key -> do Just key -> do
g <- Annex.gitRepo
showStart "dropunused" s showStart "dropunused" s
backend <- keyBackend key from <- Annex.getState Annex.fromremote
-- drop both content in the backend and any tmp case from of
-- file for the key Just name -> do
r <- Remote.byName name
return $ Just $ performRemote r key
_ -> return $ Just $ perform key
{- drop both content in the backend and any tmp file for the key -}
perform :: Key -> CommandPerform
perform key = do
g <- Annex.gitRepo
let tmp = gitAnnexTmpLocation g key let tmp = gitAnnexTmpLocation g key
tmp_exists <- liftIO $ doesFileExist tmp tmp_exists <- liftIO $ doesFileExist tmp
when tmp_exists $ liftIO $ removeFile tmp when tmp_exists $ liftIO $ removeFile tmp
return $ Just $ Command.Drop.perform key backend (Just 0) backend <- keyBackend key
Command.Drop.perform key backend (Just 0) -- force drop
performRemote :: Remote.Remote Annex -> Key -> CommandPerform
performRemote r key = do
showNote $ "from " ++ Remote.name r ++ "..."
return $ Just $ Command.Move.fromCleanup r True key
readUnusedLog :: Annex (M.Map String Key) readUnusedLog :: Annex (M.Map String Key)
readUnusedLog = do readUnusedLog = do

View file

@ -20,9 +20,11 @@ import Content
import Messages import Messages
import Locations import Locations
import Utility import Utility
import LocationLog
import qualified Annex import qualified Annex
import qualified GitRepo as Git import qualified GitRepo as Git
import qualified Backend import qualified Backend
import qualified Remote
command :: [Command] command :: [Command]
command = [repoCommand "unused" paramNothing seek command = [repoCommand "unused" paramNothing seek
@ -39,35 +41,54 @@ start = notBareRepo $ do
perform :: CommandPerform perform :: CommandPerform
perform = do perform = do
_ <- checkUnused from <- Annex.getState Annex.fromremote
case from of
Just name -> do
r <- Remote.byName name
checkRemoteUnused r
_ -> checkUnused
return $ Just $ return True return $ Just $ return True
checkUnused :: Annex Bool checkUnused :: Annex ()
checkUnused = do checkUnused = do
(unused, staletmp) <- unusedKeys (unused, staletmp) <- unusedKeys
let unusedlist = number 0 unused let unusedlist = number 0 unused
let staletmplist = number (length unused) staletmp let staletmplist = number (length unused) staletmp
let list = unusedlist ++ staletmplist let list = unusedlist ++ staletmplist
g <- Annex.gitRepo writeUnusedFile list
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $ unless (null unused) $ showLongNote $ unusedMsg unusedlist
map (\(n, k) -> show n ++ " " ++ show k) list unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist
unless (null unused) $ showLongNote $ unusedmsg unusedlist
unless (null staletmp) $ showLongNote $ staletmpmsg staletmplist
unless (null list) $ showLongNote $ "\n" unless (null list) $ showLongNote $ "\n"
return $ null list
checkRemoteUnused :: Remote.Remote Annex -> Annex ()
checkRemoteUnused r = do
g <- Annex.gitRepo
showNote $ "checking for unused data on " ++ Remote.name r ++ "..."
referenced <- getKeysReferenced
logged <- liftIO $ loggedKeys g
remotehas <- filterM isthere logged
let remoteunused = remotehas `exclude` referenced
let list = number 0 remoteunused
writeUnusedFile list
unless (null remoteunused) $ do
showLongNote $ remoteUnusedMsg r list
showLongNote $ "\n"
where where
unusedmsg u = unlines $ isthere k = do
["Some annexed data is no longer pointed to by any files in the repository:"] g <- Annex.gitRepo
++ table u ++ us <- liftIO $ keyLocations g k
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++ return $ uuid `elem` us
dropmsg uuid = Remote.uuid r
staletmpmsg t = unlines $
["Some partially transferred data exists in temporary files:"]
++ table t ++ dropmsg
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
writeUnusedFile :: [(Int, Key)] -> Annex ()
writeUnusedFile l = do
g <- Annex.gitRepo
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $
unlines $ map (\(n, k) -> show n ++ " " ++ show k) l
table :: [(Int, Key)] -> [String]
table l = [" NUMBER KEY"] ++ map cols l table l = [" NUMBER KEY"] ++ map cols l
where
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
pad n s = s ++ replicate (n - length s) ' ' pad n s = s ++ replicate (n - length s) ' '
@ -75,6 +96,39 @@ number :: Int -> [a] -> [(Int, a)]
number _ [] = [] number _ [] = []
number n (x:xs) = (n+1, x):(number (n+1) xs) number n (x:xs) = (n+1, x):(number (n+1) xs)
staleTmpMsg :: [(Int, Key)] -> String
staleTmpMsg t = unlines $
["Some partially transferred data exists in temporary files:"]
++ table t ++ [dropMsg Nothing]
unusedMsg :: [(Int, Key)] -> String
unusedMsg u = unusedMsg' u
["Some annexed data is no longer used by any files in the repository:"]
[dropMsg Nothing]
remoteUnusedMsg :: Remote.Remote Annex -> [(Int, Key)] -> String
remoteUnusedMsg r u = unusedMsg' u
["Some annexed data on " ++ name ++
" is not used by any files in this repository."]
[dropMsg $ Just r,
"Please be cautious -- are you sure that the remote repository",
"does not use this data?"]
where
name = Remote.name r
unusedMsg' :: [(Int, Key)] -> [String] -> [String] -> String
unusedMsg' u header trailer = unlines $
header ++
table u ++
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
trailer
dropMsg :: Maybe (Remote.Remote Annex) -> String
dropMsg Nothing = dropMsg' ""
dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
dropMsg' :: String -> String
dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)"
{- Finds keys whose content is present, but that do not seem to be used {- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo, or that are only present as tmp files. -} - by any files in the git repo, or that are only present as tmp files. -}
unusedKeys :: Annex ([Key], [Key]) unusedKeys :: Annex ([Key], [Key])
@ -93,7 +147,9 @@ unusedKeys = do
referenced <- getKeysReferenced referenced <- getKeysReferenced
tmps <- tmpKeys tmps <- tmpKeys
let (unused, staletmp, duptmp) = calcUnusedKeys present referenced tmps let unused = present `exclude` referenced
let staletmp = tmps `exclude` present
let duptmp = tmps `exclude` staletmp
-- Tmp files that are dups of content already present -- Tmp files that are dups of content already present
-- can simply be removed. -- can simply be removed.
@ -102,18 +158,16 @@ unusedKeys = do
return (unused, staletmp) return (unused, staletmp)
calcUnusedKeys :: [Key] -> [Key] -> [Key] -> ([Key], [Key], [Key]) {- Finds items in the first, smaller list, that are not
calcUnusedKeys present referenced tmps = (unused, staletmp, duptmp) - present in the second, larger list.
where -
unused = present `exclude` referenced - Constructing a single set, of the list that tends to be
staletmp = tmps `exclude` present - smaller, appears more efficient in both memory and CPU
duptmp = tmps `exclude` staletmp - than constructing and taking the S.difference of two sets. -}
exclude :: Ord a => [a] -> [a] -> [a]
-- Constructing a single set, of the list that tends to be
-- smaller, appears more efficient in both memory and CPU
-- than constructing and taking the S.difference of two sets.
exclude [] _ = [] -- optimisation exclude [] _ = [] -- optimisation
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
where
remove a b = foldl (flip S.delete) b a remove a b = foldl (flip S.delete) b a
{- List of keys referenced by symlinks in the git repo. -} {- List of keys referenced by symlinks in the git repo. -}

3
debian/changelog vendored
View file

@ -4,6 +4,9 @@ git-annex (0.20110402) UNRELEASED; urgency=low
some issues with git on OSX with the mixed-case directories. some issues with git on OSX with the mixed-case directories.
No migration is needed; the old mixed case hash directories are still No migration is needed; the old mixed case hash directories are still
read; new information is written to the new directories. read; new information is written to the new directories.
* Unused files on remotes, particulary special remotes, can now be
identified and dropped, by using "--from remote" with git annex unused
and git annex dropunused.
-- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400 -- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400

View file

@ -155,16 +155,21 @@ Many git-annex commands will stage changes for later `git commit` by you.
* unused * unused
Checks the annex for data that is not used by any files currently Checks the annex for data that does not correspond to any files currently
in the annex, and prints a numbered list of the data. in the respository, and prints a numbered list of the data.
To only show unused temp files, specify --fast To only show unused temp files, specify --fast
To check data on a remote that does not correspond to any files currently
in the local repository, specify --from.
* dropunused [number ...] * dropunused [number ...]
Drops the data corresponding to the numbers, as listed by the last Drops the data corresponding to the numbers, as listed by the last
`git annex unused` `git annex unused`
To drop the data from a remote, specify --from.
* find [path ...] * find [path ...]
Outputs a list of annexed files whose content is currently present. Outputs a list of annexed files whose content is currently present.
@ -317,12 +322,15 @@ Many git-annex commands will stage changes for later `git commit` by you.
* --from=repository * --from=repository
Specifies a repository that content will be retrieved from. Specifies a repository that content will be retrieved from, or that
should otherwise be acted on.
It should be specified using the name of a configured remote. It should be specified using the name of a configured remote.
* --to=repository * --to=repository
Specifies a repository that content will be sent to. Specifies a repository that content will be sent to.
It should be specified using the name of a configured remote. It should be specified using the name of a configured remote.
* --exclude=glob * --exclude=glob

View file

@ -8,3 +8,26 @@ They cannot be used by other git commands though.
* [[Amazon_S3]] * [[Amazon_S3]]
* [[directory]] * [[directory]]
## Unused content on special remotes
Over time, special remotes can accumulate file content that is no longer
referred to by files in git. Normally, unused content in the current
repository is found by running `git annex unused`. To detect unused content
on special remotes, instead use `git annex unused --from`. Example:
$ git annex unused --from mys3
unused (checking for unused data on mys3...)
Some annexed data on mys3 is not used by any files in this repository.
NUMBER KEY
1 WORM-s3-m1301674316--foo
(To see where data was previously used, try: git log --stat -S'KEY')
(To remove unwanted data: git-annex dropunused --from mys3 NUMBER)
Please be cautious -- are you sure that the remote repository
does not use this data?
$ git annex dropunused --from mys3 1
dropunused 12948 (from mys3...) ok
Do be cautious when using this; it cannot detect if content in a remote
is used by that remote, or is the last copy of data that is used by
some *other* remote.

View file

@ -10,7 +10,7 @@ eliminate it to save space.
# git annex unused # git annex unused
unused (checking for unused data...) unused (checking for unused data...)
Some annexed data is no longer pointed to by any files in the repository. Some annexed data is no longer used by any files in the repository.
NUMBER KEY NUMBER KEY
1 WORM-s3-m1289672605--file 1 WORM-s3-m1289672605--file
2 WORM-s14-m1289672605--file 2 WORM-s14-m1289672605--file