Improved temp file handling
* Improved temp file handling. Transfers of content can now be resumed from temp files later; the resume does not have to be the immediate next git-annex run. * unused: Include partially transferred content in the list.
This commit is contained in:
parent
04fe906ac6
commit
e6da7eb177
8 changed files with 87 additions and 40 deletions
10
CmdLine.hs
10
CmdLine.hs
|
@ -105,13 +105,3 @@ shutdown errnum = do
|
||||||
unless (q == GitQueue.empty) $ do
|
unless (q == GitQueue.empty) $ do
|
||||||
showSideAction "Recording state in git..."
|
showSideAction "Recording state in git..."
|
||||||
Annex.queueRun
|
Annex.queueRun
|
||||||
|
|
||||||
-- If nothing failed, clean up any files left in the temp directory,
|
|
||||||
-- but leave the directory itself. If something failed, temp files
|
|
||||||
-- are left behind to allow resuming on re-run.
|
|
||||||
when (errnum == 0) $ do
|
|
||||||
g <- Annex.gitRepo
|
|
||||||
let tmp = gitAnnexTmpDir g
|
|
||||||
exists <- liftIO $ doesDirectoryExist tmp
|
|
||||||
when exists $ liftIO $ removeDirectoryRecursive tmp
|
|
||||||
liftIO $ createDirectoryIfMissing True tmp
|
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
module Command.DropUnused where
|
module Command.DropUnused where
|
||||||
|
|
||||||
|
import Control.Monad (when)
|
||||||
import Control.Monad.State (liftIO)
|
import Control.Monad.State (liftIO)
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import System.Directory
|
import System.Directory
|
||||||
|
@ -33,8 +34,14 @@ start s = do
|
||||||
case M.lookup s m of
|
case M.lookup s m of
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just key -> do
|
Just key -> do
|
||||||
|
g <- Annex.gitRepo
|
||||||
showStart "dropunused" s
|
showStart "dropunused" s
|
||||||
backend <- keyBackend key
|
backend <- keyBackend key
|
||||||
|
-- drop both content in the backend and any tmp
|
||||||
|
-- file for the key
|
||||||
|
let tmp = gitAnnexTmpLocation g key
|
||||||
|
tmp_exists <- liftIO $ doesFileExist tmp
|
||||||
|
when tmp_exists $ liftIO $ removeFile tmp
|
||||||
return $ Just $ Command.Drop.perform key backend (Just 0)
|
return $ Just $ Command.Drop.perform key backend (Just 0)
|
||||||
|
|
||||||
readUnusedLog :: Annex (M.Map String Key)
|
readUnusedLog :: Annex (M.Map String Key)
|
||||||
|
|
|
@ -7,9 +7,12 @@
|
||||||
|
|
||||||
module Command.Unused where
|
module Command.Unused where
|
||||||
|
|
||||||
|
import Control.Monad (filterM, unless)
|
||||||
import Control.Monad.State (liftIO)
|
import Control.Monad.State (liftIO)
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
|
import System.FilePath
|
||||||
|
import System.Directory
|
||||||
|
|
||||||
import Command
|
import Command
|
||||||
import Types
|
import Types
|
||||||
|
@ -41,49 +44,71 @@ perform = do
|
||||||
checkUnused :: Annex Bool
|
checkUnused :: Annex Bool
|
||||||
checkUnused = do
|
checkUnused = do
|
||||||
showNote "checking for unused data..."
|
showNote "checking for unused data..."
|
||||||
unused <- unusedKeys
|
(unused, staletmp) <- unusedKeys
|
||||||
let list = number 1 unused
|
let unusedlist = number 0 unused
|
||||||
|
let staletmplist = number (length unused) staletmp
|
||||||
|
let list = unusedlist ++ staletmplist
|
||||||
g <- Annex.gitRepo
|
g <- Annex.gitRepo
|
||||||
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $
|
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $
|
||||||
map (\(n, k) -> show n ++ " " ++ show k) list
|
map (\(n, k) -> show n ++ " " ++ show k) list
|
||||||
if null unused
|
unless (null unused) $
|
||||||
then return True
|
showLongNote $ unusedmsg unusedlist
|
||||||
else do
|
unless (null staletmp) $
|
||||||
showLongNote $ w list
|
showLongNote $ staletmpmsg staletmplist
|
||||||
return False
|
unless (null list) $
|
||||||
|
showLongNote $ "\n"
|
||||||
|
return $ null list
|
||||||
|
|
||||||
where
|
where
|
||||||
w u = unlines $
|
unusedmsg u = unlines $
|
||||||
["Some annexed data is no longer pointed to by any files in the repository:",
|
["Some annexed data is no longer pointed to by any files in the repository:"]
|
||||||
" NUMBER KEY"]
|
++ table u ++
|
||||||
++ map cols u ++
|
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
|
||||||
["(To see where data was previously used, try: git log --stat -S'KEY')",
|
dropmsg
|
||||||
"(To remove unwanted data: git-annex dropunused NUMBER)",
|
staletmpmsg t = unlines $
|
||||||
""]
|
["Some partially transferred data exists in temporary files:"]
|
||||||
|
++ table t ++ dropmsg
|
||||||
|
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
|
||||||
|
|
||||||
|
table l = [" NUMBER KEY"] ++ map cols l
|
||||||
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
|
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
|
||||||
pad n s = s ++ replicate (n - length s) ' '
|
pad n s = s ++ replicate (n - length s) ' '
|
||||||
|
|
||||||
number :: Integer -> [a] -> [(Integer, a)]
|
number :: Int -> [a] -> [(Int, a)]
|
||||||
number _ [] = []
|
number _ [] = []
|
||||||
number n (x:xs) = (n, x):(number (n+1) xs)
|
number n (x:xs) = (n+1, x):(number (n+1) xs)
|
||||||
|
|
||||||
{- Finds keys whose content is present, but that do not seem to be used
|
{- Finds keys whose content is present, but that do not seem to be used
|
||||||
- by any files in the git repo. -}
|
- by any files in the git repo, or that are only present as tmp files. -}
|
||||||
unusedKeys :: Annex [Key]
|
unusedKeys :: Annex ([Key], [Key])
|
||||||
unusedKeys = do
|
unusedKeys = do
|
||||||
|
g <- Annex.gitRepo
|
||||||
present <- getKeysPresent
|
present <- getKeysPresent
|
||||||
referenced <- getKeysReferenced
|
referenced <- getKeysReferenced
|
||||||
|
|
||||||
-- Constructing a single map, of the set that tends to be smaller,
|
let unused = present `exclude` referenced
|
||||||
-- appears more efficient in both memory and CPU than constructing
|
|
||||||
-- and taking the M.difference of two maps.
|
|
||||||
let present_m = existsMap present
|
|
||||||
let unused_m = remove referenced present_m
|
|
||||||
return $ M.keys unused_m
|
|
||||||
where
|
|
||||||
remove a b = foldl (flip M.delete) b a
|
|
||||||
|
|
||||||
existsMap :: Ord k => [k] -> M.Map k Int
|
-- Some tmp files may be dups copies of content that is fully present.
|
||||||
existsMap l = M.fromList $ map (\k -> (k, 1)) l
|
-- Simply delete those, while including the keys for the rest of
|
||||||
|
-- the temp files in the returned list for the user to deal with.
|
||||||
|
tmps <- tmpKeys
|
||||||
|
let staletmp = tmps `exclude` present
|
||||||
|
let duptmp = tmps `exclude` staletmp
|
||||||
|
_ <- liftIO $ mapM (\t -> removeFile $ gitAnnexTmpLocation g t) duptmp
|
||||||
|
|
||||||
|
return (unused, staletmp)
|
||||||
|
|
||||||
|
where
|
||||||
|
-- Constructing a single map, of the set that tends to be
|
||||||
|
-- smaller, appears more efficient in both memory and CPU
|
||||||
|
-- than constructing and taking the M.difference of two maps.
|
||||||
|
exclude [] _ = [] -- optimisation
|
||||||
|
exclude smaller larger = M.keys $ remove larger $ existsMap smaller
|
||||||
|
|
||||||
|
existsMap :: Ord k => [k] -> M.Map k Int
|
||||||
|
existsMap l = M.fromList $ map (\k -> (k, 1)) l
|
||||||
|
|
||||||
|
remove a b = foldl (flip M.delete) b a
|
||||||
|
|
||||||
{- List of keys referenced by symlinks in the git repo. -}
|
{- List of keys referenced by symlinks in the git repo. -}
|
||||||
getKeysReferenced :: Annex [Key]
|
getKeysReferenced :: Annex [Key]
|
||||||
|
@ -92,3 +117,17 @@ getKeysReferenced = do
|
||||||
files <- liftIO $ Git.inRepo g [Git.workTree g]
|
files <- liftIO $ Git.inRepo g [Git.workTree g]
|
||||||
keypairs <- mapM Backend.lookupFile files
|
keypairs <- mapM Backend.lookupFile files
|
||||||
return $ map fst $ catMaybes keypairs
|
return $ map fst $ catMaybes keypairs
|
||||||
|
|
||||||
|
{- List of keys that have temp files in the git repo. -}
|
||||||
|
tmpKeys :: Annex [Key]
|
||||||
|
tmpKeys = do
|
||||||
|
g <- Annex.gitRepo
|
||||||
|
let tmp = gitAnnexTmpDir g
|
||||||
|
exists <- liftIO $ doesDirectoryExist tmp
|
||||||
|
if (not exists)
|
||||||
|
then return []
|
||||||
|
else do
|
||||||
|
contents <- liftIO $ getDirectoryContents tmp
|
||||||
|
files <- liftIO $ filterM doesFileExist $
|
||||||
|
map (tmp </>) contents
|
||||||
|
return $ map (fileKey . takeFileName) files
|
||||||
|
|
|
@ -67,7 +67,7 @@ logStatus key status = do
|
||||||
getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool
|
getViaTmp :: Key -> (FilePath -> Annex Bool) -> Annex Bool
|
||||||
getViaTmp key action = do
|
getViaTmp key action = do
|
||||||
g <- Annex.gitRepo
|
g <- Annex.gitRepo
|
||||||
let tmp = gitAnnexTmpDir g </> keyFile key
|
let tmp = gitAnnexTmpLocation g key
|
||||||
liftIO $ createDirectoryIfMissing True (parentDir tmp)
|
liftIO $ createDirectoryIfMissing True (parentDir tmp)
|
||||||
success <- action tmp
|
success <- action tmp
|
||||||
if success
|
if success
|
||||||
|
|
|
@ -15,6 +15,7 @@ module Locations (
|
||||||
gitAnnexDir,
|
gitAnnexDir,
|
||||||
gitAnnexObjectDir,
|
gitAnnexObjectDir,
|
||||||
gitAnnexTmpDir,
|
gitAnnexTmpDir,
|
||||||
|
gitAnnexTmpLocation,
|
||||||
gitAnnexBadDir,
|
gitAnnexBadDir,
|
||||||
gitAnnexUnusedLog,
|
gitAnnexUnusedLog,
|
||||||
isLinkToAnnex,
|
isLinkToAnnex,
|
||||||
|
@ -83,6 +84,10 @@ gitAnnexObjectDir r = addTrailingPathSeparator $ Git.workTree r </> objectDir
|
||||||
gitAnnexTmpDir :: Git.Repo -> FilePath
|
gitAnnexTmpDir :: Git.Repo -> FilePath
|
||||||
gitAnnexTmpDir r = addTrailingPathSeparator $ gitAnnexDir r </> "tmp"
|
gitAnnexTmpDir r = addTrailingPathSeparator $ gitAnnexDir r </> "tmp"
|
||||||
|
|
||||||
|
{- The temp file to use for a given key. -}
|
||||||
|
gitAnnexTmpLocation :: Git.Repo -> Key -> FilePath
|
||||||
|
gitAnnexTmpLocation r key = gitAnnexTmpDir r </> keyFile key
|
||||||
|
|
||||||
{- .git-annex/bad/ is used for bad files found during fsck -}
|
{- .git-annex/bad/ is used for bad files found during fsck -}
|
||||||
gitAnnexBadDir :: Git.Repo -> FilePath
|
gitAnnexBadDir :: Git.Repo -> FilePath
|
||||||
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"
|
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"
|
||||||
|
|
4
debian/changelog
vendored
4
debian/changelog
vendored
|
@ -8,6 +8,10 @@ git-annex (0.19) UNRELEASED; urgency=low
|
||||||
* fsck, drop: Take untrusted repositories into account.
|
* fsck, drop: Take untrusted repositories into account.
|
||||||
* Bugfix: Files were copied from trusted remotes first even if their
|
* Bugfix: Files were copied from trusted remotes first even if their
|
||||||
annex.cost was higher than other remotes.
|
annex.cost was higher than other remotes.
|
||||||
|
* Improved temp file handling. Transfers of content can now be resumed
|
||||||
|
from temp files later; the resume does not have to be the immediate
|
||||||
|
next git-annex run.
|
||||||
|
* unused: Include partially transferred content in the list.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Wed, 19 Jan 2011 18:07:51 -0400
|
-- Joey Hess <joeyh@debian.org> Wed, 19 Jan 2011 18:07:51 -0400
|
||||||
|
|
||||||
|
|
|
@ -9,3 +9,5 @@ This presents 2 problems:
|
||||||
finished.
|
finished.
|
||||||
|
|
||||||
--[[Joey]]
|
--[[Joey]]
|
||||||
|
|
||||||
|
[[done]]
|
||||||
|
|
|
@ -132,7 +132,7 @@ Many git-annex commands will stage changes for later `git commit` by you.
|
||||||
* unused
|
* unused
|
||||||
|
|
||||||
Checks the annex for data that is not used by any files currently
|
Checks the annex for data that is not used by any files currently
|
||||||
in the annex, and prints a numbered list of the data.
|
in the annex, and prints a numbered list of the data.
|
||||||
|
|
||||||
* dropunused [number ...]
|
* dropunused [number ...]
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue