Improved temp file handling
* Improved temp file handling. Transfers of content can now be resumed from temp files later; the resume does not have to be the immediate next git-annex run. * unused: Include partially transferred content in the list.
This commit is contained in:
parent
04fe906ac6
commit
e6da7eb177
8 changed files with 87 additions and 40 deletions
|
@ -7,9 +7,12 @@
|
|||
|
||||
module Command.Unused where
|
||||
|
||||
import Control.Monad (filterM, unless)
|
||||
import Control.Monad.State (liftIO)
|
||||
import qualified Data.Map as M
|
||||
import Data.Maybe
|
||||
import System.FilePath
|
||||
import System.Directory
|
||||
|
||||
import Command
|
||||
import Types
|
||||
|
@ -41,49 +44,71 @@ perform = do
|
|||
checkUnused :: Annex Bool
|
||||
checkUnused = do
|
||||
showNote "checking for unused data..."
|
||||
unused <- unusedKeys
|
||||
let list = number 1 unused
|
||||
(unused, staletmp) <- unusedKeys
|
||||
let unusedlist = number 0 unused
|
||||
let staletmplist = number (length unused) staletmp
|
||||
let list = unusedlist ++ staletmplist
|
||||
g <- Annex.gitRepo
|
||||
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $
|
||||
map (\(n, k) -> show n ++ " " ++ show k) list
|
||||
if null unused
|
||||
then return True
|
||||
else do
|
||||
showLongNote $ w list
|
||||
return False
|
||||
unless (null unused) $
|
||||
showLongNote $ unusedmsg unusedlist
|
||||
unless (null staletmp) $
|
||||
showLongNote $ staletmpmsg staletmplist
|
||||
unless (null list) $
|
||||
showLongNote $ "\n"
|
||||
return $ null list
|
||||
|
||||
where
|
||||
w u = unlines $
|
||||
["Some annexed data is no longer pointed to by any files in the repository:",
|
||||
" NUMBER KEY"]
|
||||
++ map cols u ++
|
||||
["(To see where data was previously used, try: git log --stat -S'KEY')",
|
||||
"(To remove unwanted data: git-annex dropunused NUMBER)",
|
||||
""]
|
||||
unusedmsg u = unlines $
|
||||
["Some annexed data is no longer pointed to by any files in the repository:"]
|
||||
++ table u ++
|
||||
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
|
||||
dropmsg
|
||||
staletmpmsg t = unlines $
|
||||
["Some partially transferred data exists in temporary files:"]
|
||||
++ table t ++ dropmsg
|
||||
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
|
||||
|
||||
table l = [" NUMBER KEY"] ++ map cols l
|
||||
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
|
||||
pad n s = s ++ replicate (n - length s) ' '
|
||||
|
||||
number :: Integer -> [a] -> [(Integer, a)]
|
||||
number :: Int -> [a] -> [(Int, a)]
|
||||
number _ [] = []
|
||||
number n (x:xs) = (n, x):(number (n+1) xs)
|
||||
number n (x:xs) = (n+1, x):(number (n+1) xs)
|
||||
|
||||
{- Finds keys whose content is present, but that do not seem to be used
|
||||
- by any files in the git repo. -}
|
||||
unusedKeys :: Annex [Key]
|
||||
- by any files in the git repo, or that are only present as tmp files. -}
|
||||
unusedKeys :: Annex ([Key], [Key])
|
||||
unusedKeys = do
|
||||
g <- Annex.gitRepo
|
||||
present <- getKeysPresent
|
||||
referenced <- getKeysReferenced
|
||||
|
||||
-- Constructing a single map, of the set that tends to be smaller,
|
||||
-- appears more efficient in both memory and CPU than constructing
|
||||
-- and taking the M.difference of two maps.
|
||||
let present_m = existsMap present
|
||||
let unused_m = remove referenced present_m
|
||||
return $ M.keys unused_m
|
||||
where
|
||||
remove a b = foldl (flip M.delete) b a
|
||||
let unused = present `exclude` referenced
|
||||
|
||||
existsMap :: Ord k => [k] -> M.Map k Int
|
||||
existsMap l = M.fromList $ map (\k -> (k, 1)) l
|
||||
-- Some tmp files may be dups copies of content that is fully present.
|
||||
-- Simply delete those, while including the keys for the rest of
|
||||
-- the temp files in the returned list for the user to deal with.
|
||||
tmps <- tmpKeys
|
||||
let staletmp = tmps `exclude` present
|
||||
let duptmp = tmps `exclude` staletmp
|
||||
_ <- liftIO $ mapM (\t -> removeFile $ gitAnnexTmpLocation g t) duptmp
|
||||
|
||||
return (unused, staletmp)
|
||||
|
||||
where
|
||||
-- Constructing a single map, of the set that tends to be
|
||||
-- smaller, appears more efficient in both memory and CPU
|
||||
-- than constructing and taking the M.difference of two maps.
|
||||
exclude [] _ = [] -- optimisation
|
||||
exclude smaller larger = M.keys $ remove larger $ existsMap smaller
|
||||
|
||||
existsMap :: Ord k => [k] -> M.Map k Int
|
||||
existsMap l = M.fromList $ map (\k -> (k, 1)) l
|
||||
|
||||
remove a b = foldl (flip M.delete) b a
|
||||
|
||||
{- List of keys referenced by symlinks in the git repo. -}
|
||||
getKeysReferenced :: Annex [Key]
|
||||
|
@ -92,3 +117,17 @@ getKeysReferenced = do
|
|||
files <- liftIO $ Git.inRepo g [Git.workTree g]
|
||||
keypairs <- mapM Backend.lookupFile files
|
||||
return $ map fst $ catMaybes keypairs
|
||||
|
||||
{- List of keys that have temp files in the git repo. -}
|
||||
tmpKeys :: Annex [Key]
|
||||
tmpKeys = do
|
||||
g <- Annex.gitRepo
|
||||
let tmp = gitAnnexTmpDir g
|
||||
exists <- liftIO $ doesDirectoryExist tmp
|
||||
if (not exists)
|
||||
then return []
|
||||
else do
|
||||
contents <- liftIO $ getDirectoryContents tmp
|
||||
files <- liftIO $ filterM doesFileExist $
|
||||
map (tmp </>) contents
|
||||
return $ map (fileKey . takeFileName) files
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue