add timestamps to unused log files

This will be used in expiring old unused objects. The timestamp is when it
was first noticed it was unused.

Backwards compatability: It supports reading old format unused log files.
The old version of git-annex will ignore lines in log files written by the
new version, so the worst interop problem would be git annex dropunused not
knowing some numbers that git-annex unused reported.
This commit is contained in:
Joey Hess 2014-01-22 15:33:02 -04:00
parent 5f6ebfcd07
commit ae3cd632bd
3 changed files with 58 additions and 15 deletions

View file

@ -92,7 +92,7 @@ check file msg a c = do
l <- a l <- a
let unusedlist = number c l let unusedlist = number c l
unless (null l) $ showLongNote $ msg unusedlist unless (null l) $ showLongNote $ msg unusedlist
writeUnusedLog file unusedlist updateUnusedLog file $ M.fromList unusedlist
return $ c + length l return $ c + length l
number :: Int -> [a] -> [(Int, a)] number :: Int -> [a] -> [(Int, a)]
@ -328,9 +328,9 @@ data UnusedMaps = UnusedMaps
withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek
withUnusedMaps a params = do withUnusedMaps a params = do
unused <- readUnusedLog "" unused <- readUnusedMap ""
unusedbad <- readUnusedLog "bad" unusedbad <- readUnusedMap "bad"
unusedtmp <- readUnusedLog "tmp" unusedtmp <- readUnusedMap "tmp"
let m = unused `M.union` unusedbad `M.union` unusedtmp let m = unused `M.union` unusedbad `M.union` unusedtmp
let unusedmaps = UnusedMaps unused unusedbad unusedtmp let unusedmaps = UnusedMaps unused unusedbad unusedtmp
seekActions $ return $ map (a unusedmaps) $ seekActions $ return $ map (a unusedmaps) $

View file

@ -1,32 +1,71 @@
{- git-annex unused log file {- git-annex unused log file
- -
- Copyright 2010,2012 Joey Hess <joey@kitenet.net> - This file is stored locally in .git/annex/, not in the git-annex branch.
-
- The format: "int key timestamp"
-
- The int is a short, stable identifier that the user can use to
- refer to this key. (Equivilant to a filename.)
-
- The timestamp indicates when the key was first determined to be unused.
- Older versions of the log omit the timestamp.
-
- Copyright 2010-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
module Logs.Unused ( module Logs.Unused (
UnusedMap, UnusedMap,
writeUnusedLog, updateUnusedLog,
readUnusedLog, readUnusedLog,
readUnusedMap,
unusedKeys, unusedKeys,
) where ) where
import qualified Data.Map as M import qualified Data.Map as M
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import Common.Annex import Common.Annex
import Types.Key import Types.Key
import Utility.Tmp import Utility.Tmp
-- everything that is stored in the unused log
type UnusedLog = M.Map Key (Int, Maybe POSIXTime)
-- used to look up unused keys specified by the user
type UnusedMap = M.Map Int Key type UnusedMap = M.Map Int Key
writeUnusedLog :: FilePath -> [(Int, Key)] -> Annex () log2map :: UnusedLog -> UnusedMap
log2map = M.fromList . map (\(k, (i, _t)) -> (i, k)) . M.toList
map2log :: POSIXTime -> UnusedMap -> UnusedLog
map2log t = M.fromList . map (\(i, k) -> (k, (i, Just t))) . M.toList
{- Only keeps keys that are in the new log, but uses any timestamps
- those keys had in the old log. -}
preserveTimestamps :: UnusedLog -> UnusedLog -> UnusedLog
preserveTimestamps oldl newl = M.intersection (M.unionWith oldts oldl newl) newl
where
oldts _old@(_, ts) _new@(int, _) = (int, ts)
updateUnusedLog :: FilePath -> UnusedMap -> Annex ()
updateUnusedLog prefix m = do
oldl <- readUnusedLog prefix
newl <- preserveTimestamps oldl . flip map2log m <$> liftIO getPOSIXTime
writeUnusedLog prefix newl
writeUnusedLog :: FilePath -> UnusedLog -> Annex ()
writeUnusedLog prefix l = do writeUnusedLog prefix l = do
logfile <- fromRepo $ gitAnnexUnusedLog prefix logfile <- fromRepo $ gitAnnexUnusedLog prefix
liftIO $ viaTmp writeFile logfile $ liftIO $ viaTmp writeFile logfile $ unlines $ map format $ M.toList l
unlines $ map (\(n, k) -> show n ++ " " ++ key2file k) l where
format (k, (i, Just t)) = show i ++ " " ++ key2file k ++ " " ++ show t
format (k, (i, Nothing)) = show i ++ " " ++ key2file k
readUnusedLog :: FilePath -> Annex UnusedMap readUnusedLog :: FilePath -> Annex UnusedLog
readUnusedLog prefix = do readUnusedLog prefix = do
f <- fromRepo $ gitAnnexUnusedLog prefix f <- fromRepo $ gitAnnexUnusedLog prefix
ifM (liftIO $ doesFileExist f) ifM (liftIO $ doesFileExist f)
@ -35,11 +74,15 @@ readUnusedLog prefix = do
, return M.empty , return M.empty
) )
where where
parse line = case (readish tag, file2key rest) of parse line = case (readish sint, file2key skey, utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" ts) of
(Just num, Just key) -> Just (num, key) (Just int, Just key, mtimestamp) -> Just (key, (int, mtimestamp))
_ -> Nothing _ -> Nothing
where where
(tag, rest) = separate (== ' ') line (sint, rest) = separate (== ' ') line
(skey, ts) = separate (== ' ') rest
readUnusedMap :: FilePath -> Annex UnusedMap
readUnusedMap = log2map <$$> readUnusedLog
unusedKeys :: Annex [Key] unusedKeys :: Annex [Key]
unusedKeys = M.elems <$> readUnusedLog "" unusedKeys = M.keys <$> readUnusedLog ""

View file

@ -683,7 +683,7 @@ test_unused env = intmpclonerepoInDirect env $ do
where where
checkunused expectedkeys desc = do checkunused expectedkeys desc = do
git_annex env "unused" [] @? "unused failed" git_annex env "unused" [] @? "unused failed"
unusedmap <- annexeval $ Logs.Unused.readUnusedLog "" unusedmap <- annexeval $ Logs.Unused.readUnusedMap ""
let unusedkeys = M.elems unusedmap let unusedkeys = M.elems unusedmap
assertEqual ("unused keys differ " ++ desc) assertEqual ("unused keys differ " ++ desc)
(sort expectedkeys) (sort unusedkeys) (sort expectedkeys) (sort unusedkeys)