2012-05-02 18:59:05 +00:00
|
|
|
{- git-annex unused log file
|
|
|
|
-
|
2014-01-22 19:33:02 +00:00
|
|
|
- This file is stored locally in .git/annex/, not in the git-annex branch.
|
|
|
|
-
|
|
|
|
- The format: "int key timestamp"
|
|
|
|
-
|
|
|
|
- The int is a short, stable identifier that the user can use to
|
|
|
|
- refer to this key. (Equivilant to a filename.)
|
|
|
|
-
|
|
|
|
- The timestamp indicates when the key was first determined to be unused.
|
|
|
|
- Older versions of the log omit the timestamp.
|
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2010-2014 Joey Hess <id@joeyh.name>
|
2012-05-02 18:59:05 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Logs.Unused (
|
2013-07-04 06:36:02 +00:00
|
|
|
UnusedMap,
|
2014-01-22 19:33:02 +00:00
|
|
|
updateUnusedLog,
|
2012-05-02 18:59:05 +00:00
|
|
|
readUnusedLog,
|
2014-01-22 19:33:02 +00:00
|
|
|
readUnusedMap,
|
2014-01-23 19:09:43 +00:00
|
|
|
dateUnusedLog,
|
2013-07-03 19:26:59 +00:00
|
|
|
unusedKeys,
|
2014-01-23 19:09:43 +00:00
|
|
|
unusedKeys',
|
assistant unused file handling
Make sanity checker run git annex unused daily, and queue up transfers
of unused files to any remotes that will have them. The transfer retrying
code works for us here, so eg when a backup disk remote is plugged in,
any transfers to it are done. Once the unused files reach a remote,
they'll be removed locally as unwanted.
If the setup does not cause unused files to go to a remote, they'll pile
up, and the sanity checker detects this using some heuristics that are
pretty good -- 1000 unused files, or 10% of disk used by unused files,
or more disk wasted by unused files than is left free. Once it detects
this, it pops up an alert in the webapp, with a button to take action.
TODO: Webapp UI to configure this, and also the ability to launch an
immediate cleanup of all unused files.
This commit was sponsored by Simon Michael.
2014-01-23 02:48:56 +00:00
|
|
|
setUnusedKeys,
|
2012-05-02 18:59:05 +00:00
|
|
|
) where
|
|
|
|
|
|
|
|
import qualified Data.Map as M
|
2014-01-22 20:35:32 +00:00
|
|
|
import qualified Data.Set as S
|
2014-01-22 19:33:02 +00:00
|
|
|
import Data.Time.Clock.POSIX
|
|
|
|
import Data.Time
|
2012-05-02 18:59:05 +00:00
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2014-01-22 20:35:32 +00:00
|
|
|
import qualified Annex
|
2015-05-10 18:45:55 +00:00
|
|
|
import Logs.TimeStamp
|
2018-01-02 21:17:10 +00:00
|
|
|
import Logs.File
|
2012-05-02 18:59:05 +00:00
|
|
|
|
2014-01-22 19:33:02 +00:00
|
|
|
-- everything that is stored in the unused log
|
|
|
|
type UnusedLog = M.Map Key (Int, Maybe POSIXTime)
|
|
|
|
|
|
|
|
-- used to look up unused keys specified by the user
|
2013-07-03 19:26:59 +00:00
|
|
|
type UnusedMap = M.Map Int Key
|
|
|
|
|
2014-01-22 19:33:02 +00:00
|
|
|
log2map :: UnusedLog -> UnusedMap
|
|
|
|
log2map = M.fromList . map (\(k, (i, _t)) -> (i, k)) . M.toList
|
|
|
|
|
|
|
|
map2log :: POSIXTime -> UnusedMap -> UnusedLog
|
|
|
|
map2log t = M.fromList . map (\(i, k) -> (k, (i, Just t))) . M.toList
|
|
|
|
|
|
|
|
{- Only keeps keys that are in the new log, but uses any timestamps
|
|
|
|
- those keys had in the old log. -}
|
|
|
|
preserveTimestamps :: UnusedLog -> UnusedLog -> UnusedLog
|
|
|
|
preserveTimestamps oldl newl = M.intersection (M.unionWith oldts oldl newl) newl
|
|
|
|
where
|
|
|
|
oldts _old@(_, ts) _new@(int, _) = (int, ts)
|
|
|
|
|
|
|
|
updateUnusedLog :: FilePath -> UnusedMap -> Annex ()
|
|
|
|
updateUnusedLog prefix m = do
|
|
|
|
oldl <- readUnusedLog prefix
|
|
|
|
newl <- preserveTimestamps oldl . flip map2log m <$> liftIO getPOSIXTime
|
|
|
|
writeUnusedLog prefix newl
|
|
|
|
|
|
|
|
writeUnusedLog :: FilePath -> UnusedLog -> Annex ()
|
2012-05-02 18:59:05 +00:00
|
|
|
writeUnusedLog prefix l = do
|
|
|
|
logfile <- fromRepo $ gitAnnexUnusedLog prefix
|
2018-01-02 21:17:10 +00:00
|
|
|
writeLogFile logfile $ unlines $ map format $ M.toList l
|
2014-01-22 19:33:02 +00:00
|
|
|
where
|
|
|
|
format (k, (i, Just t)) = show i ++ " " ++ key2file k ++ " " ++ show t
|
|
|
|
format (k, (i, Nothing)) = show i ++ " " ++ key2file k
|
2012-05-02 18:59:05 +00:00
|
|
|
|
2014-01-22 19:33:02 +00:00
|
|
|
readUnusedLog :: FilePath -> Annex UnusedLog
|
2012-05-02 18:59:05 +00:00
|
|
|
readUnusedLog prefix = do
|
|
|
|
f <- fromRepo $ gitAnnexUnusedLog prefix
|
|
|
|
ifM (liftIO $ doesFileExist f)
|
2013-04-03 07:52:41 +00:00
|
|
|
( M.fromList . mapMaybe parse . lines
|
2016-12-24 18:46:31 +00:00
|
|
|
<$> liftIO (readFileStrict f)
|
2012-05-02 18:59:05 +00:00
|
|
|
, return M.empty
|
|
|
|
)
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
2015-05-10 18:45:55 +00:00
|
|
|
parse line = case (readish sint, file2key skey, parsePOSIXTime ts) of
|
2014-01-22 19:33:02 +00:00
|
|
|
(Just int, Just key, mtimestamp) -> Just (key, (int, mtimestamp))
|
2012-11-11 04:51:07 +00:00
|
|
|
_ -> Nothing
|
|
|
|
where
|
2014-01-22 19:33:02 +00:00
|
|
|
(sint, rest) = separate (== ' ') line
|
2014-02-08 19:27:11 +00:00
|
|
|
(rts, rskey) = separate (== ' ') (reverse rest)
|
|
|
|
skey = reverse rskey
|
|
|
|
ts = reverse rts
|
2014-01-22 19:33:02 +00:00
|
|
|
|
|
|
|
readUnusedMap :: FilePath -> Annex UnusedMap
|
|
|
|
readUnusedMap = log2map <$$> readUnusedLog
|
2012-05-02 18:59:05 +00:00
|
|
|
|
2014-01-23 19:09:43 +00:00
|
|
|
dateUnusedLog :: FilePath -> Annex (Maybe UTCTime)
|
|
|
|
dateUnusedLog prefix = do
|
|
|
|
f <- fromRepo $ gitAnnexUnusedLog prefix
|
|
|
|
liftIO $ catchMaybeIO $ getModificationTime f
|
|
|
|
|
2014-01-22 20:35:32 +00:00
|
|
|
{- Set of unused keys. This is cached for speed. -}
|
|
|
|
unusedKeys :: Annex (S.Set Key)
|
|
|
|
unusedKeys = maybe (setUnusedKeys =<< unusedKeys') return
|
|
|
|
=<< Annex.getState Annex.unusedkeys
|
|
|
|
|
|
|
|
unusedKeys' :: Annex [Key]
|
|
|
|
unusedKeys' = M.keys <$> readUnusedLog ""
|
|
|
|
|
|
|
|
setUnusedKeys :: [Key] -> Annex (S.Set Key)
|
|
|
|
setUnusedKeys ks = do
|
|
|
|
let v = S.fromList ks
|
|
|
|
Annex.changeState $ \s -> s { Annex.unusedkeys = Just v }
|
|
|
|
return v
|