git-annex/Logs/Location.hs
Joey Hess 574514545c
git-annex log --sizesof
This can take a lot of memory. I decided to violate the usual rule in
git-annex that it operate in constant memory no matter how many annexed
objects. In this case, it would be hard to be fast without using a big
map of the location logs. The main difficulty here is that there can be
many git-annex branches and it needs to display a consistent view at a
point in time, which means merging information from multiple git-annex
branches.

I have not checked if there are any laziness leaks in this code. It
takes 1 gb to run in my big repo, which is around what I estimated
before writing it.

2 options that are documented are not yet implemented.

Small bug: With eg --when=1h, it will display at 12:00 then 1:10 if the
next change after 12:59 is then. Then it waits until after 2:10 to
display the next change. It ought to wait until after 2:00.

Sponsored-by: Brock Spratlen on Patreon
2023-11-10 17:26:10 -04:00

217 lines
6.4 KiB
Haskell

{-# LANGUAGE BangPatterns #-}
{- git-annex location log
-
- git-annex keeps track of which repositories have the contents of annexed
- files.
-
- Repositories record their UUID and the date when they --get or --drop
- a value.
-
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Logs.Location (
LogStatus(..),
logStatus,
logStatusAfter,
logChange,
loggedLocations,
loggedLocationsHistorical,
loggedLocationsRef,
parseLoggedLocations,
isKnownKey,
checkDead,
setDead,
Unchecked,
finishCheck,
loggedKeys,
loggedKeysFor,
loggedKeysFor',
overLocationLogs,
overLocationLogs',
) where
import Annex.Common
import qualified Annex.Branch
import Logs
import Logs.Presence
import Annex.UUID
import Annex.CatFile
import Annex.VectorClock
import Git.Types (RefDate, Ref)
import qualified Annex
import Data.Time.Clock
import qualified Data.ByteString.Lazy as L
{- Log a change in the presence of a key's value in current repository. -}
logStatus :: Key -> LogStatus -> Annex ()
logStatus key s = do
u <- getUUID
logChange key u s
{- Run an action that gets the content of a key, and update the log
- when it succeeds. -}
logStatusAfter :: Key -> Annex Bool -> Annex Bool
logStatusAfter key a = ifM a
( do
logStatus key InfoPresent
return True
, return False
)
{- Log a change in the presence of a key's value in a repository. -}
logChange :: Key -> UUID -> LogStatus -> Annex ()
logChange key u@(UUID _) s = do
config <- Annex.getGitConfig
maybeAddLog
(Annex.Branch.RegardingUUID [u])
(locationLogFile config key)
s
(LogInfo (fromUUID u))
logChange _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have
- the value of a key. -}
loggedLocations :: Key -> Annex [UUID]
loggedLocations = getLoggedLocations currentLogInfo
{- Gets the location log on a particular date. -}
loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]
loggedLocationsHistorical = getLoggedLocations . historicalLogInfo
{- Gets the locations contained in a git ref. -}
loggedLocationsRef :: Ref -> Annex [UUID]
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
{- Parses the content of a log file and gets the locations in it. -}
parseLoggedLocations :: L.ByteString -> [UUID]
parseLoggedLocations l = map (toUUID . fromLogInfo . info)
(filterPresent (parseLog l))
getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID]
getLoggedLocations getter key = do
config <- Annex.getGitConfig
map (toUUID . fromLogInfo) <$> getter (locationLogFile config key)
{- Is there a location log for the key? True even for keys with no
- remaining locations. -}
isKnownKey :: Key -> Annex Bool
isKnownKey key = do
config <- Annex.getGitConfig
not . null <$> readLog (locationLogFile config key)
{- For a key to be dead, all locations that have location status for the key
- must have InfoDead set. -}
checkDead :: Key -> Annex Bool
checkDead key = do
config <- Annex.getGitConfig
ls <- compactLog <$> readLog (locationLogFile config key)
return $! all (\l -> status l == InfoDead) ls
{- Updates the log to say that a key is dead.
-
- Changes all logged lines for the key, in any location, that are
- currently InfoMissing, to be InfoDead.
-
- The vector clock in the log is updated minimally, so that any
- other location log changes are guaranteed to overrule this.
-}
setDead :: Key -> Annex ()
setDead key = do
config <- Annex.getGitConfig
let logfile = locationLogFile config key
ls <- compactLog <$> readLog logfile
mapM_ (go logfile) (filter (\l -> status l == InfoMissing) ls)
where
go logfile l =
let u = toUUID (fromLogInfo (info l))
c = case date l of
VectorClock v -> CandidateVectorClock $
v + realToFrac (picosecondsToDiffTime 1)
Unknown -> CandidateVectorClock 0
in addLog' (Annex.Branch.RegardingUUID [u]) logfile InfoDead
(info l) c
data Unchecked a = Unchecked (Annex (Maybe a))
finishCheck :: Unchecked a -> Annex (Maybe a)
finishCheck (Unchecked a) = a
{- Finds all keys that have location log information.
- (There may be duplicate keys in the list.)
-
- Keys that have been marked as dead are not included.
-}
loggedKeys :: Annex (Maybe ([Unchecked Key], IO Bool))
loggedKeys = loggedKeys' (not <$$> checkDead)
loggedKeys' :: (Key -> Annex Bool) -> Annex (Maybe ([Unchecked Key], IO Bool))
loggedKeys' check = do
config <- Annex.getGitConfig
Annex.Branch.files >>= \case
Nothing -> return Nothing
Just (bfs, cleanup) -> do
let l = mapMaybe (defercheck <$$> locationLogFileKey config) bfs
return (Just (l, cleanup))
where
defercheck k = Unchecked $ ifM (check k)
( return (Just k)
, return Nothing
)
{- Finds all keys that have location log information indicating
- they are present in the specified repository.
-
- This does not stream well; use loggedKeysFor' for lazy streaming.
-}
loggedKeysFor :: UUID -> Annex (Maybe [Key])
loggedKeysFor u = loggedKeysFor' u >>= \case
Nothing -> return Nothing
Just (l, cleanup) -> do
l' <- catMaybes <$> mapM finishCheck l
liftIO $ void cleanup
return (Just l')
loggedKeysFor' :: UUID -> Annex (Maybe ([Unchecked Key], IO Bool))
loggedKeysFor' u = loggedKeys' isthere
where
isthere k = do
us <- loggedLocations k
let !there = u `elem` us
return there
{- This is much faster than loggedKeys. -}
overLocationLogs :: v -> (Key -> [UUID] -> v -> Annex v) -> Annex v
overLocationLogs v = overLocationLogs' v (flip const)
overLocationLogs'
:: v
-> (Annex (Maybe (Key, RawFilePath, Maybe L.ByteString)) -> Annex v -> Annex v)
-> (Key -> [UUID] -> v -> Annex v)
-> Annex v
overLocationLogs' iv discarder keyaction = do
config <- Annex.getGitConfig
let getk = locationLogFileKey config
let go v reader = reader >>= \case
Just (k, f, content) -> discarder reader $ do
-- precache to make checkDead fast, and also to
-- make any accesses done in keyaction fast.
maybe noop (Annex.Branch.precache f) content
ifM (checkDead k)
( go v reader
, do
!v' <- keyaction k (maybe [] parseLoggedLocations content) v
go v' reader
)
Nothing -> return v
Annex.Branch.overBranchFileContents getk (go iv) >>= \case
Just r -> return r
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"