unused/dropunused: support --from
This commit is contained in:
parent
09a16176de
commit
868300d4c1
6 changed files with 147 additions and 44 deletions
|
@ -19,6 +19,8 @@ import Messages
|
||||||
import Locations
|
import Locations
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import qualified Command.Drop
|
import qualified Command.Drop
|
||||||
|
import qualified Command.Move
|
||||||
|
import qualified Remote
|
||||||
import Backend
|
import Backend
|
||||||
import Key
|
import Key
|
||||||
|
|
||||||
|
@ -40,15 +42,28 @@ start m s = notBareRepo $ do
|
||||||
case M.lookup s m of
|
case M.lookup s m of
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just key -> do
|
Just key -> do
|
||||||
g <- Annex.gitRepo
|
|
||||||
showStart "dropunused" s
|
showStart "dropunused" s
|
||||||
backend <- keyBackend key
|
from <- Annex.getState Annex.fromremote
|
||||||
-- drop both content in the backend and any tmp
|
case from of
|
||||||
-- file for the key
|
Just name -> do
|
||||||
|
r <- Remote.byName name
|
||||||
|
return $ Just $ performRemote r key
|
||||||
|
_ -> return $ Just $ perform key
|
||||||
|
|
||||||
|
{- drop both content in the backend and any tmp file for the key -}
|
||||||
|
perform :: Key -> CommandPerform
|
||||||
|
perform key = do
|
||||||
|
g <- Annex.gitRepo
|
||||||
let tmp = gitAnnexTmpLocation g key
|
let tmp = gitAnnexTmpLocation g key
|
||||||
tmp_exists <- liftIO $ doesFileExist tmp
|
tmp_exists <- liftIO $ doesFileExist tmp
|
||||||
when tmp_exists $ liftIO $ removeFile tmp
|
when tmp_exists $ liftIO $ removeFile tmp
|
||||||
return $ Just $ Command.Drop.perform key backend (Just 0)
|
backend <- keyBackend key
|
||||||
|
Command.Drop.perform key backend (Just 0) -- force drop
|
||||||
|
|
||||||
|
performRemote :: Remote.Remote Annex -> Key -> CommandPerform
|
||||||
|
performRemote r key = do
|
||||||
|
showNote $ "from " ++ Remote.name r ++ "..."
|
||||||
|
return $ Just $ Command.Move.fromCleanup r True key
|
||||||
|
|
||||||
readUnusedLog :: Annex (M.Map String Key)
|
readUnusedLog :: Annex (M.Map String Key)
|
||||||
readUnusedLog = do
|
readUnusedLog = do
|
||||||
|
|
|
@ -20,9 +20,11 @@ import Content
|
||||||
import Messages
|
import Messages
|
||||||
import Locations
|
import Locations
|
||||||
import Utility
|
import Utility
|
||||||
|
import LocationLog
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import qualified GitRepo as Git
|
import qualified GitRepo as Git
|
||||||
import qualified Backend
|
import qualified Backend
|
||||||
|
import qualified Remote
|
||||||
|
|
||||||
command :: [Command]
|
command :: [Command]
|
||||||
command = [repoCommand "unused" paramNothing seek
|
command = [repoCommand "unused" paramNothing seek
|
||||||
|
@ -39,35 +41,54 @@ start = notBareRepo $ do
|
||||||
|
|
||||||
perform :: CommandPerform
|
perform :: CommandPerform
|
||||||
perform = do
|
perform = do
|
||||||
_ <- checkUnused
|
from <- Annex.getState Annex.fromremote
|
||||||
|
case from of
|
||||||
|
Just name -> do
|
||||||
|
r <- Remote.byName name
|
||||||
|
checkRemoteUnused r
|
||||||
|
_ -> checkUnused
|
||||||
return $ Just $ return True
|
return $ Just $ return True
|
||||||
|
|
||||||
checkUnused :: Annex Bool
|
checkUnused :: Annex ()
|
||||||
checkUnused = do
|
checkUnused = do
|
||||||
(unused, staletmp) <- unusedKeys
|
(unused, staletmp) <- unusedKeys
|
||||||
let unusedlist = number 0 unused
|
let unusedlist = number 0 unused
|
||||||
let staletmplist = number (length unused) staletmp
|
let staletmplist = number (length unused) staletmp
|
||||||
let list = unusedlist ++ staletmplist
|
let list = unusedlist ++ staletmplist
|
||||||
g <- Annex.gitRepo
|
writeUnusedFile list
|
||||||
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $
|
unless (null unused) $ showLongNote $ unusedMsg unusedlist
|
||||||
map (\(n, k) -> show n ++ " " ++ show k) list
|
unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist
|
||||||
unless (null unused) $ showLongNote $ unusedmsg unusedlist
|
|
||||||
unless (null staletmp) $ showLongNote $ staletmpmsg staletmplist
|
|
||||||
unless (null list) $ showLongNote $ "\n"
|
unless (null list) $ showLongNote $ "\n"
|
||||||
return $ null list
|
|
||||||
|
|
||||||
|
checkRemoteUnused :: Remote.Remote Annex -> Annex ()
|
||||||
|
checkRemoteUnused r = do
|
||||||
|
g <- Annex.gitRepo
|
||||||
|
showNote $ "checking for unused data on " ++ Remote.name r ++ "..."
|
||||||
|
referenced <- getKeysReferenced
|
||||||
|
logged <- liftIO $ loggedKeys g
|
||||||
|
remotehas <- filterM isthere logged
|
||||||
|
let remoteunused = remotehas `exclude` referenced
|
||||||
|
let list = number 0 remoteunused
|
||||||
|
writeUnusedFile list
|
||||||
|
unless (null remoteunused) $ do
|
||||||
|
showLongNote $ remoteUnusedMsg r list
|
||||||
|
showLongNote $ "\n"
|
||||||
where
|
where
|
||||||
unusedmsg u = unlines $
|
isthere k = do
|
||||||
["Some annexed data is no longer pointed to by any files in the repository:"]
|
g <- Annex.gitRepo
|
||||||
++ table u ++
|
us <- liftIO $ keyLocations g k
|
||||||
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
|
return $ uuid `elem` us
|
||||||
dropmsg
|
uuid = Remote.uuid r
|
||||||
staletmpmsg t = unlines $
|
|
||||||
["Some partially transferred data exists in temporary files:"]
|
|
||||||
++ table t ++ dropmsg
|
|
||||||
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
|
|
||||||
|
|
||||||
|
writeUnusedFile :: [(Int, Key)] -> Annex ()
|
||||||
|
writeUnusedFile l = do
|
||||||
|
g <- Annex.gitRepo
|
||||||
|
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $
|
||||||
|
unlines $ map (\(n, k) -> show n ++ " " ++ show k) l
|
||||||
|
|
||||||
|
table :: [(Int, Key)] -> [String]
|
||||||
table l = [" NUMBER KEY"] ++ map cols l
|
table l = [" NUMBER KEY"] ++ map cols l
|
||||||
|
where
|
||||||
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
|
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
|
||||||
pad n s = s ++ replicate (n - length s) ' '
|
pad n s = s ++ replicate (n - length s) ' '
|
||||||
|
|
||||||
|
@ -75,6 +96,39 @@ number :: Int -> [a] -> [(Int, a)]
|
||||||
number _ [] = []
|
number _ [] = []
|
||||||
number n (x:xs) = (n+1, x):(number (n+1) xs)
|
number n (x:xs) = (n+1, x):(number (n+1) xs)
|
||||||
|
|
||||||
|
staleTmpMsg :: [(Int, Key)] -> String
|
||||||
|
staleTmpMsg t = unlines $
|
||||||
|
["Some partially transferred data exists in temporary files:"]
|
||||||
|
++ table t ++ [dropMsg Nothing]
|
||||||
|
|
||||||
|
unusedMsg :: [(Int, Key)] -> String
|
||||||
|
unusedMsg u = unusedMsg' u
|
||||||
|
["Some annexed data is no longer used by any files in the repository:"]
|
||||||
|
[dropMsg Nothing]
|
||||||
|
|
||||||
|
remoteUnusedMsg :: Remote.Remote Annex -> [(Int, Key)] -> String
|
||||||
|
remoteUnusedMsg r u = unusedMsg' u
|
||||||
|
["Some annexed data on " ++ name ++
|
||||||
|
" is not used by any files in this repository."]
|
||||||
|
[dropMsg $ Just r,
|
||||||
|
"Please be cautious -- are you sure that the remote repository",
|
||||||
|
"does not use this data?"]
|
||||||
|
where
|
||||||
|
name = Remote.name r
|
||||||
|
|
||||||
|
unusedMsg' :: [(Int, Key)] -> [String] -> [String] -> String
|
||||||
|
unusedMsg' u header trailer = unlines $
|
||||||
|
header ++
|
||||||
|
table u ++
|
||||||
|
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
|
||||||
|
trailer
|
||||||
|
|
||||||
|
dropMsg :: Maybe (Remote.Remote Annex) -> String
|
||||||
|
dropMsg Nothing = dropMsg' ""
|
||||||
|
dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
|
||||||
|
dropMsg' :: String -> String
|
||||||
|
dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)"
|
||||||
|
|
||||||
{- Finds keys whose content is present, but that do not seem to be used
|
{- Finds keys whose content is present, but that do not seem to be used
|
||||||
- by any files in the git repo, or that are only present as tmp files. -}
|
- by any files in the git repo, or that are only present as tmp files. -}
|
||||||
unusedKeys :: Annex ([Key], [Key])
|
unusedKeys :: Annex ([Key], [Key])
|
||||||
|
@ -93,7 +147,9 @@ unusedKeys = do
|
||||||
referenced <- getKeysReferenced
|
referenced <- getKeysReferenced
|
||||||
tmps <- tmpKeys
|
tmps <- tmpKeys
|
||||||
|
|
||||||
let (unused, staletmp, duptmp) = calcUnusedKeys present referenced tmps
|
let unused = present `exclude` referenced
|
||||||
|
let staletmp = tmps `exclude` present
|
||||||
|
let duptmp = tmps `exclude` staletmp
|
||||||
|
|
||||||
-- Tmp files that are dups of content already present
|
-- Tmp files that are dups of content already present
|
||||||
-- can simply be removed.
|
-- can simply be removed.
|
||||||
|
@ -102,18 +158,16 @@ unusedKeys = do
|
||||||
|
|
||||||
return (unused, staletmp)
|
return (unused, staletmp)
|
||||||
|
|
||||||
calcUnusedKeys :: [Key] -> [Key] -> [Key] -> ([Key], [Key], [Key])
|
{- Finds items in the first, smaller list, that are not
|
||||||
calcUnusedKeys present referenced tmps = (unused, staletmp, duptmp)
|
- present in the second, larger list.
|
||||||
where
|
-
|
||||||
unused = present `exclude` referenced
|
- Constructing a single set, of the list that tends to be
|
||||||
staletmp = tmps `exclude` present
|
- smaller, appears more efficient in both memory and CPU
|
||||||
duptmp = tmps `exclude` staletmp
|
- than constructing and taking the S.difference of two sets. -}
|
||||||
|
exclude :: Ord a => [a] -> [a] -> [a]
|
||||||
-- Constructing a single set, of the list that tends to be
|
|
||||||
-- smaller, appears more efficient in both memory and CPU
|
|
||||||
-- than constructing and taking the S.difference of two sets.
|
|
||||||
exclude [] _ = [] -- optimisation
|
exclude [] _ = [] -- optimisation
|
||||||
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
|
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
|
||||||
|
where
|
||||||
remove a b = foldl (flip S.delete) b a
|
remove a b = foldl (flip S.delete) b a
|
||||||
|
|
||||||
{- List of keys referenced by symlinks in the git repo. -}
|
{- List of keys referenced by symlinks in the git repo. -}
|
||||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -4,6 +4,9 @@ git-annex (0.20110402) UNRELEASED; urgency=low
|
||||||
some issues with git on OSX with the mixed-case directories.
|
some issues with git on OSX with the mixed-case directories.
|
||||||
No migration is needed; the old mixed case hash directories are still
|
No migration is needed; the old mixed case hash directories are still
|
||||||
read; new information is written to the new directories.
|
read; new information is written to the new directories.
|
||||||
|
* Unused files on remotes, particulary special remotes, can now be
|
||||||
|
identified and dropped, by using "--from remote" with git annex unused
|
||||||
|
and git annex dropunused.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400
|
-- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400
|
||||||
|
|
||||||
|
|
|
@ -155,16 +155,21 @@ Many git-annex commands will stage changes for later `git commit` by you.
|
||||||
|
|
||||||
* unused
|
* unused
|
||||||
|
|
||||||
Checks the annex for data that is not used by any files currently
|
Checks the annex for data that does not correspond to any files currently
|
||||||
in the annex, and prints a numbered list of the data.
|
in the respository, and prints a numbered list of the data.
|
||||||
|
|
||||||
To only show unused temp files, specify --fast
|
To only show unused temp files, specify --fast
|
||||||
|
|
||||||
|
To check data on a remote that does not correspond to any files currently
|
||||||
|
in the local repository, specify --from.
|
||||||
|
|
||||||
* dropunused [number ...]
|
* dropunused [number ...]
|
||||||
|
|
||||||
Drops the data corresponding to the numbers, as listed by the last
|
Drops the data corresponding to the numbers, as listed by the last
|
||||||
`git annex unused`
|
`git annex unused`
|
||||||
|
|
||||||
|
To drop the data from a remote, specify --from.
|
||||||
|
|
||||||
* find [path ...]
|
* find [path ...]
|
||||||
|
|
||||||
Outputs a list of annexed files whose content is currently present.
|
Outputs a list of annexed files whose content is currently present.
|
||||||
|
@ -317,12 +322,15 @@ Many git-annex commands will stage changes for later `git commit` by you.
|
||||||
|
|
||||||
* --from=repository
|
* --from=repository
|
||||||
|
|
||||||
Specifies a repository that content will be retrieved from.
|
Specifies a repository that content will be retrieved from, or that
|
||||||
|
should otherwise be acted on.
|
||||||
|
|
||||||
It should be specified using the name of a configured remote.
|
It should be specified using the name of a configured remote.
|
||||||
|
|
||||||
* --to=repository
|
* --to=repository
|
||||||
|
|
||||||
Specifies a repository that content will be sent to.
|
Specifies a repository that content will be sent to.
|
||||||
|
|
||||||
It should be specified using the name of a configured remote.
|
It should be specified using the name of a configured remote.
|
||||||
|
|
||||||
* --exclude=glob
|
* --exclude=glob
|
||||||
|
|
|
@ -8,3 +8,26 @@ They cannot be used by other git commands though.
|
||||||
|
|
||||||
* [[Amazon_S3]]
|
* [[Amazon_S3]]
|
||||||
* [[directory]]
|
* [[directory]]
|
||||||
|
|
||||||
|
## Unused content on special remotes
|
||||||
|
|
||||||
|
Over time, special remotes can accumulate file content that is no longer
|
||||||
|
referred to by files in git. Normally, unused content in the current
|
||||||
|
repository is found by running `git annex unused`. To detect unused content
|
||||||
|
on special remotes, instead use `git annex unused --from`. Example:
|
||||||
|
|
||||||
|
$ git annex unused --from mys3
|
||||||
|
unused (checking for unused data on mys3...)
|
||||||
|
Some annexed data on mys3 is not used by any files in this repository.
|
||||||
|
NUMBER KEY
|
||||||
|
1 WORM-s3-m1301674316--foo
|
||||||
|
(To see where data was previously used, try: git log --stat -S'KEY')
|
||||||
|
(To remove unwanted data: git-annex dropunused --from mys3 NUMBER)
|
||||||
|
Please be cautious -- are you sure that the remote repository
|
||||||
|
does not use this data?
|
||||||
|
$ git annex dropunused --from mys3 1
|
||||||
|
dropunused 12948 (from mys3...) ok
|
||||||
|
|
||||||
|
Do be cautious when using this; it cannot detect if content in a remote
|
||||||
|
is used by that remote, or is the last copy of data that is used by
|
||||||
|
some *other* remote.
|
||||||
|
|
|
@ -10,7 +10,7 @@ eliminate it to save space.
|
||||||
|
|
||||||
# git annex unused
|
# git annex unused
|
||||||
unused (checking for unused data...)
|
unused (checking for unused data...)
|
||||||
Some annexed data is no longer pointed to by any files in the repository.
|
Some annexed data is no longer used by any files in the repository.
|
||||||
NUMBER KEY
|
NUMBER KEY
|
||||||
1 WORM-s3-m1289672605--file
|
1 WORM-s3-m1289672605--file
|
||||||
2 WORM-s14-m1289672605--file
|
2 WORM-s14-m1289672605--file
|
||||||
|
|
Loading…
Reference in a new issue