add timestamps to unused log files

This will be used in expiring old unused objects. The timestamp is when it
was first noticed it was unused.

Backwards compatability: It supports reading old format unused log files.
The old version of git-annex will ignore lines in log files written by the
new version, so the worst interop problem would be git annex dropunused not
knowing some numbers that git-annex unused reported.
This commit is contained in:
Joey Hess 2014-01-22 15:33:02 -04:00
parent 5f6ebfcd07
commit ae3cd632bd
3 changed files with 58 additions and 15 deletions

View file

@ -92,7 +92,7 @@ check file msg a c = do
l <- a
let unusedlist = number c l
unless (null l) $ showLongNote $ msg unusedlist
writeUnusedLog file unusedlist
updateUnusedLog file $ M.fromList unusedlist
return $ c + length l
number :: Int -> [a] -> [(Int, a)]
@ -328,9 +328,9 @@ data UnusedMaps = UnusedMaps
withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek
withUnusedMaps a params = do
unused <- readUnusedLog ""
unusedbad <- readUnusedLog "bad"
unusedtmp <- readUnusedLog "tmp"
unused <- readUnusedMap ""
unusedbad <- readUnusedMap "bad"
unusedtmp <- readUnusedMap "tmp"
let m = unused `M.union` unusedbad `M.union` unusedtmp
let unusedmaps = UnusedMaps unused unusedbad unusedtmp
seekActions $ return $ map (a unusedmaps) $

View file

@ -1,32 +1,71 @@
{- git-annex unused log file
-
- Copyright 2010,2012 Joey Hess <joey@kitenet.net>
- This file is stored locally in .git/annex/, not in the git-annex branch.
-
- The format: "int key timestamp"
-
- The int is a short, stable identifier that the user can use to
- refer to this key. (Equivilant to a filename.)
-
- The timestamp indicates when the key was first determined to be unused.
- Older versions of the log omit the timestamp.
-
- Copyright 2010-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Unused (
UnusedMap,
writeUnusedLog,
updateUnusedLog,
readUnusedLog,
readUnusedMap,
unusedKeys,
) where
import qualified Data.Map as M
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import Common.Annex
import Types.Key
import Utility.Tmp
-- everything that is stored in the unused log
type UnusedLog = M.Map Key (Int, Maybe POSIXTime)
-- used to look up unused keys specified by the user
type UnusedMap = M.Map Int Key
writeUnusedLog :: FilePath -> [(Int, Key)] -> Annex ()
log2map :: UnusedLog -> UnusedMap
log2map = M.fromList . map (\(k, (i, _t)) -> (i, k)) . M.toList
map2log :: POSIXTime -> UnusedMap -> UnusedLog
map2log t = M.fromList . map (\(i, k) -> (k, (i, Just t))) . M.toList
{- Only keeps keys that are in the new log, but uses any timestamps
- those keys had in the old log. -}
preserveTimestamps :: UnusedLog -> UnusedLog -> UnusedLog
preserveTimestamps oldl newl = M.intersection (M.unionWith oldts oldl newl) newl
where
oldts _old@(_, ts) _new@(int, _) = (int, ts)
updateUnusedLog :: FilePath -> UnusedMap -> Annex ()
updateUnusedLog prefix m = do
oldl <- readUnusedLog prefix
newl <- preserveTimestamps oldl . flip map2log m <$> liftIO getPOSIXTime
writeUnusedLog prefix newl
writeUnusedLog :: FilePath -> UnusedLog -> Annex ()
writeUnusedLog prefix l = do
logfile <- fromRepo $ gitAnnexUnusedLog prefix
liftIO $ viaTmp writeFile logfile $
unlines $ map (\(n, k) -> show n ++ " " ++ key2file k) l
liftIO $ viaTmp writeFile logfile $ unlines $ map format $ M.toList l
where
format (k, (i, Just t)) = show i ++ " " ++ key2file k ++ " " ++ show t
format (k, (i, Nothing)) = show i ++ " " ++ key2file k
readUnusedLog :: FilePath -> Annex UnusedMap
readUnusedLog :: FilePath -> Annex UnusedLog
readUnusedLog prefix = do
f <- fromRepo $ gitAnnexUnusedLog prefix
ifM (liftIO $ doesFileExist f)
@ -35,11 +74,15 @@ readUnusedLog prefix = do
, return M.empty
)
where
parse line = case (readish tag, file2key rest) of
(Just num, Just key) -> Just (num, key)
parse line = case (readish sint, file2key skey, utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" ts) of
(Just int, Just key, mtimestamp) -> Just (key, (int, mtimestamp))
_ -> Nothing
where
(tag, rest) = separate (== ' ') line
(sint, rest) = separate (== ' ') line
(skey, ts) = separate (== ' ') rest
readUnusedMap :: FilePath -> Annex UnusedMap
readUnusedMap = log2map <$$> readUnusedLog
unusedKeys :: Annex [Key]
unusedKeys = M.elems <$> readUnusedLog ""
unusedKeys = M.keys <$> readUnusedLog ""

View file

@ -683,7 +683,7 @@ test_unused env = intmpclonerepoInDirect env $ do
where
checkunused expectedkeys desc = do
git_annex env "unused" [] @? "unused failed"
unusedmap <- annexeval $ Logs.Unused.readUnusedLog ""
unusedmap <- annexeval $ Logs.Unused.readUnusedMap ""
let unusedkeys = M.elems unusedmap
assertEqual ("unused keys differ " ++ desc)
(sort expectedkeys) (sort unusedkeys)