diff --git a/Annex.hs b/Annex.hs index 4208e5c741..9e4d0a45c3 100644 --- a/Annex.hs +++ b/Annex.hs @@ -133,7 +133,7 @@ data AnnexRead = AnnexRead , forcenumcopies :: Maybe NumCopies , forcemincopies :: Maybe MinCopies , forcebackend :: Maybe String - , reposizes :: MVar (Maybe (M.Map UUID RepoSize)) + , reposizes :: MVar (Maybe (M.Map UUID (RepoSize, SizeOffset))) , rebalance :: Bool , useragent :: Maybe String , desktopnotify :: DesktopNotify diff --git a/Annex/RepoSize.hs b/Annex/RepoSize.hs index b9a6ffe95c..084c2c3efd 100644 --- a/Annex/RepoSize.hs +++ b/Annex/RepoSize.hs @@ -13,10 +13,10 @@ module Annex.RepoSize ( ) where import Annex.Common -import Annex.RepoSize.LiveUpdate import qualified Annex import Annex.Branch (UnmergedBranches(..), getBranch) import qualified Database.RepoSize as Db +import Annex.Journal import Logs import Logs.Location import Logs.UUID @@ -36,7 +36,10 @@ import qualified Data.Set as S - was called. It does not update while git-annex is running. -} getRepoSizes :: Bool -> Annex (M.Map UUID RepoSize) -getRepoSizes quiet = do +getRepoSizes quiet = M.map fst <$> getRepoSizes' quiet + +getRepoSizes' :: Bool -> Annex (M.Map UUID (RepoSize, SizeOffset)) +getRepoSizes' quiet = do rsv <- Annex.getRead Annex.reposizes liftIO (takeMVar rsv) >>= \case Just sizemap -> do @@ -47,22 +50,24 @@ getRepoSizes quiet = do {- Like getRepoSizes, but with live updates. -} getLiveRepoSizes :: Bool -> Annex (M.Map UUID RepoSize) getLiveRepoSizes quiet = do - h <- Db.getRepoSizeHandle - liftIO (Db.estimateLiveRepoSizes h) >>= \case - Just (m, annexbranchsha) -> return m - Nothing -> do - -- Db.estimateLiveRepoSizes needs the - -- reposizes to be calculated first. - m <- getRepoSizes quiet - liftIO (Db.estimateLiveRepoSizes h) >>= \case - Just (m', annexbranchsha) -> return m' - Nothing -> return m + sizemap <- getRepoSizes' quiet + go sizemap `onException` return (M.map fst sizemap) + where + go sizemap = do + h <- Db.getRepoSizeHandle + liveoffsets <- liftIO $ Db.liveRepoOffsets h + let calc u (RepoSize size, SizeOffset startoffset) = + case M.lookup u liveoffsets of + Nothing -> RepoSize size + Just (SizeOffset offset) -> RepoSize $ + size + (offset - startoffset) + return $ M.mapWithKey calc sizemap {- Fills an empty Annex.reposizes MVar with current information - from the git-annex branch, supplimented with journalled but - not yet committed information. -} -calcRepoSizes :: Bool -> MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize) +calcRepoSizes :: Bool -> MVar (Maybe (M.Map UUID (RepoSize, SizeOffset))) -> Annex (M.Map UUID (RepoSize, SizeOffset)) calcRepoSizes quiet rsv = go `onException` failed where go = do @@ -73,7 +78,7 @@ calcRepoSizes quiet rsv = go `onException` failed Just oldbranchsha -> do currbranchsha <- getBranch if oldbranchsha == currbranchsha - then calcJournalledRepoSizes oldsizemap oldbranchsha + then calcJournalledRepoSizes h oldsizemap oldbranchsha else incrementalupdate h oldsizemap oldbranchsha currbranchsha liftIO $ putMVar rsv (Just sizemap) return sizemap @@ -83,12 +88,12 @@ calcRepoSizes quiet rsv = go `onException` failed showSideAction "calculating repository sizes" (sizemap, branchsha) <- calcBranchRepoSizes liftIO $ Db.setRepoSizes h sizemap branchsha - calcJournalledRepoSizes sizemap branchsha + calcJournalledRepoSizes h sizemap branchsha incrementalupdate h oldsizemap oldbranchsha currbranchsha = do (sizemap, branchsha) <- diffBranchRepoSizes quiet oldsizemap oldbranchsha currbranchsha liftIO $ Db.setRepoSizes h sizemap branchsha - calcJournalledRepoSizes sizemap branchsha + calcJournalledRepoSizes h sizemap branchsha failed = do liftIO $ putMVar rsv (Just M.empty) @@ -120,13 +125,21 @@ calcBranchRepoSizes = do - data from journalled location logs. -} calcJournalledRepoSizes - :: M.Map UUID RepoSize + :: Db.RepoSizeHandle + -> M.Map UUID RepoSize -> Sha - -> Annex (M.Map UUID RepoSize) -calcJournalledRepoSizes startmap branchsha = - overLocationLogsJournal startmap branchsha - (\k v m -> pure (accumRepoSizes k v m)) - Nothing + -> Annex (M.Map UUID (RepoSize, SizeOffset)) +calcJournalledRepoSizes h startmap branchsha = + -- Lock the journal to prevent updates to the size offsets + -- in the repository size database while this is processing + -- the journal files. + lockJournal $ \_jl -> do + sizemap <- overLocationLogsJournal startmap branchsha + (\k v m' -> pure (accumRepoSizes k v m')) + Nothing + offsets <- liftIO $ Db.recordedRepoOffsets h + let getoffset u = fromMaybe (SizeOffset 0) $ M.lookup u offsets + return $ M.mapWithKey (\u sz -> (sz, getoffset u)) sizemap {- Incremental update by diffing. -} diffBranchRepoSizes :: Bool -> M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha) @@ -180,3 +193,22 @@ diffBranchRepoSizes quiet oldsizemap oldbranchsha newbranchsha = do (\m u -> M.insertWith (flip const) u (RepoSize 0) m) newsizemap knownuuids + +addKeyRepoSize :: Key -> Maybe RepoSize -> Maybe RepoSize +addKeyRepoSize k mrs = case mrs of + Just (RepoSize sz) -> Just $ RepoSize $ sz + ksz + Nothing -> Just $ RepoSize ksz + where + ksz = fromMaybe 0 $ fromKey keySize k + +removeKeyRepoSize :: Key -> Maybe RepoSize -> Maybe RepoSize +removeKeyRepoSize k mrs = case mrs of + Just (RepoSize sz) -> Just $ RepoSize $ sz - ksz + Nothing -> Nothing + where + ksz = fromMaybe 0 $ fromKey keySize k + +accumRepoSizes :: Key -> (S.Set UUID, S.Set UUID) -> M.Map UUID RepoSize -> M.Map UUID RepoSize +accumRepoSizes k (newlocs, removedlocs) sizemap = + let !sizemap' = foldl' (flip $ M.alter $ addKeyRepoSize k) sizemap newlocs + in foldl' (flip $ M.alter $ removeKeyRepoSize k) sizemap' removedlocs diff --git a/Annex/RepoSize/LiveUpdate.hs b/Annex/RepoSize/LiveUpdate.hs index 5d015b2585..6f27617369 100644 --- a/Annex/RepoSize/LiveUpdate.hs +++ b/Annex/RepoSize/LiveUpdate.hs @@ -31,25 +31,6 @@ updateRepoSize lu u k s = liftIO $ finishedLiveUpdate lu u k sc InfoMissing -> RemovingKey InfoDead -> RemovingKey -addKeyRepoSize :: Key -> Maybe RepoSize -> Maybe RepoSize -addKeyRepoSize k mrs = case mrs of - Just (RepoSize sz) -> Just $ RepoSize $ sz + ksz - Nothing -> Just $ RepoSize ksz - where - ksz = fromMaybe 0 $ fromKey keySize k - -removeKeyRepoSize :: Key -> Maybe RepoSize -> Maybe RepoSize -removeKeyRepoSize k mrs = case mrs of - Just (RepoSize sz) -> Just $ RepoSize $ sz - ksz - Nothing -> Nothing - where - ksz = fromMaybe 0 $ fromKey keySize k - -accumRepoSizes :: Key -> (S.Set UUID, S.Set UUID) -> M.Map UUID RepoSize -> M.Map UUID RepoSize -accumRepoSizes k (newlocs, removedlocs) sizemap = - let !sizemap' = foldl' (flip $ M.alter $ addKeyRepoSize k) sizemap newlocs - in foldl' (flip $ M.alter $ removeKeyRepoSize k) sizemap' removedlocs - -- When the UUID is Nothing, it's a live update of the local repository. prepareLiveUpdate :: Maybe UUID -> Key -> SizeChange -> Annex LiveUpdate prepareLiveUpdate mu k sc = do diff --git a/Database/RepoSize.hs b/Database/RepoSize.hs index 1247fa81ca..0b3e15fe98 100644 --- a/Database/RepoSize.hs +++ b/Database/RepoSize.hs @@ -25,10 +25,11 @@ module Database.RepoSize ( closeDb, getRepoSizes, setRepoSizes, - estimateLiveRepoSizes, startingLiveSizeChange, successfullyFinishedLiveSizeChange, removeStaleLiveSizeChange, + recordedRepoOffsets, + liveRepoOffsets, ) where import Annex.Common @@ -164,6 +165,7 @@ setRepoSizes (RepoSizeHandle (Just h)) sizemap branchcommitsha = unsetRepoSize u forM_ (M.toList sizemap) $ uncurry setRepoSize + clearRecentChanges recordAnnexBranchCommit branchcommitsha setRepoSizes (RepoSizeHandle Nothing) _ _ = noop @@ -192,15 +194,29 @@ startingLiveSizeChange (RepoSizeHandle (Just h)) u k sc sid = ] startingLiveSizeChange (RepoSizeHandle Nothing) _ _ _ _ = noop +{- A live size change has successfully finished. + - + - Update the rolling total, add as a recent change, + - and remove the live change in the same transaction. + - + - But, it's possible that the same change has been done by two + - different processes or threads. If there is a matching recent change, + - then this one is redundant, so remove it without updating the rolling + - total. + -} successfullyFinishedLiveSizeChange :: RepoSizeHandle -> UUID -> Key -> SizeChange -> SizeChangeId -> IO () successfullyFinishedLiveSizeChange (RepoSizeHandle (Just h)) u k sc sid = H.commitDb h $ do - -- Update the rolling total, add as a recent change, - -- and remove the live change in the same transaction. + getRecentChange u k >>= \case + Just sc' | sc == sc' -> remove + _ -> go + where + go = do rollingtotal <- getSizeChangeFor u setSizeChangeFor u (updateRollingTotal rollingtotal sc k) addRecentChange u k sc - removeLiveSizeChange u k sc sid + remove + remove = removeLiveSizeChange u k sc sid successfullyFinishedLiveSizeChange (RepoSizeHandle Nothing) _ _ _ _ = noop updateRollingTotal :: FileSize -> SizeChange -> Key -> FileSize @@ -231,6 +247,13 @@ getLiveSizeChanges = M.fromListWith (++) . map conv <$> selectList [] [] let LiveSizeChanges u k sid sc = entityVal entity in (u, [(k, (sc, sid))]) +getLiveSizeChanges' :: SqlPersistM [(UUID, Key, SizeChange)] +getLiveSizeChanges' = map conv <$> selectList [] [] + where + conv entity = + let LiveSizeChanges u k _sid sc = entityVal entity + in (u, k, sc) + getSizeChanges :: SqlPersistM (M.Map UUID FileSize) getSizeChanges = M.fromList . map conv <$> selectList [] [] where @@ -251,7 +274,7 @@ setSizeChangeFor u sz = (UniqueRepoRollingTotal u) (SizeChanges u sz) [SizeChangesRollingtotal =. sz] - + addRecentChange :: UUID -> Key -> SizeChange -> SqlPersistM () addRecentChange u k sc = void $ upsertBy @@ -269,19 +292,49 @@ getRecentChange u k = do (s:_) -> Just $ recentChangesChange $ entityVal s [] -> Nothing -{- Gets the sizes of Repos as of a commit to the git-annex branch - - (which is not necessarily the current commit), adjusted with all - - live changes that have happened since then or are happening now. +getRecentChanges :: SqlPersistM [(UUID, Key, SizeChange)] +getRecentChanges = map conv <$> selectList [] [] + where + conv entity = + let RecentChanges u k sc = entityVal entity + in (u, k, sc) + +{- Clears recent changes, except when there is a live change that is + - redundant with a recent change. -} +clearRecentChanges :: SqlPersistM () +clearRecentChanges = do + live <- getLiveSizeChanges' + if null live + then deleteWhere ([] :: [Filter RecentChanges]) + else do + let liveset = S.fromList live + rcs <- getRecentChanges + forM_ rcs $ \rc@(u, k, sc) -> + when (S.notMember rc liveset) $ + deleteWhere + [ RecentChangesRepo ==. u + , RecentChangesKey ==. k + , RecentChangesChange ==. sc + ] + +{- Gets the recorded offsets to sizes of Repos, not including live + - changes. -} +recordedRepoOffsets :: RepoSizeHandle -> IO (M.Map UUID SizeOffset) +recordedRepoOffsets (RepoSizeHandle (Just h)) = + M.map SizeOffset <$> H.queryDb h getSizeChanges +recordedRepoOffsets (RepoSizeHandle Nothing) = pure mempty + +{- Gets the offsets to sizes of Repos, including all live changes that + - are happening now. - - - This does not necessarily include all changes that have been journalled, + - This does not necessarily include all changes that have been made, - only ones that had startingLiveSizeChange called for them will be - - included. Also live changes or recent changes that were to a UUID not in - - the RepoSizes map are not included. + - included. - - In the unlikely case where two live changes are occurring, one - adding a key and the other removing the same key, the one - adding the key is used, in order to err on the side of a larger - - RepoSize. + - repository size. - - In the case where the same live change is recorded by two different - processes or threads, the first to complete will record it as a recent @@ -291,24 +344,14 @@ getRecentChange u k = do - This is only expensive when there are a lot of live changes happening at - the same time. -} -estimateLiveRepoSizes :: RepoSizeHandle -> IO (Maybe (M.Map UUID RepoSize, Sha)) -estimateLiveRepoSizes (RepoSizeHandle (Just h)) = H.queryDb h $ do - getAnnexBranchCommit >>= \case - Just annexbranchsha -> do - sizechanges <- getSizeChanges - livechanges <- getLiveSizeChanges - reposizes <- getRepoSizes' - m <- M.fromList <$> forM reposizes - (go sizechanges livechanges) - return (Just (m, annexbranchsha)) - Nothing -> return Nothing +liveRepoOffsets :: RepoSizeHandle -> IO (M.Map UUID SizeOffset) +liveRepoOffsets (RepoSizeHandle (Just h)) = H.queryDb h $ do + sizechanges <- getSizeChanges + livechanges <- getLiveSizeChanges + let us = nub (M.keys sizechanges ++ M.keys livechanges) + M.fromList <$> forM us (go sizechanges livechanges) where - go - :: M.Map UUID FileSize - -> M.Map UUID [(Key, (SizeChange, SizeChangeId))] - -> (UUID, RepoSize) - -> SqlPersistM (UUID, RepoSize) - go sizechanges livechanges (u, RepoSize startsize) = do + go sizechanges livechanges u = do let livechangesbykey = M.fromListWith (++) $ map (\(k, v) -> (k, [v])) $ @@ -321,18 +364,13 @@ estimateLiveRepoSizes (RepoSizeHandle (Just h)) = H.queryDb h $ do (\t (k, sc) -> updateRollingTotal t sc k) (fromMaybe 0 (M.lookup u sizechanges)) livechanges' - return (u, RepoSize (startsize + sizechange)) + return (u, SizeOffset sizechange) combinelikelivechanges = S.elems . S.fromList . map (\(k, (sc, _)) -> (k, sc)) - nonredundantlivechange - :: M.Map Key [(SizeChange, SizeChangeId)] - -> UUID - -> (Key, (SizeChange, SizeChangeId)) - -> SqlPersistM Bool nonredundantlivechange livechangesbykey u (k, (sc, cid)) | null (competinglivechanges livechangesbykey k sc cid) = getRecentChange u k >>= pure . \case @@ -340,14 +378,8 @@ estimateLiveRepoSizes (RepoSizeHandle (Just h)) = H.queryDb h $ do Just sc' -> sc /= sc' | otherwise = pure False - competinglivechanges - :: M.Map Key [(SizeChange, SizeChangeId)] - -> Key - -> SizeChange - -> SizeChangeId - -> [(SizeChange, SizeChangeId)] competinglivechanges livechangesbykey k RemovingKey cid = filter (\(sc', cid') -> cid /= cid' && sc' == AddingKey) (fromMaybe [] $ M.lookup k livechangesbykey) competinglivechanges _ _ AddingKey _ = [] -estimateLiveRepoSizes (RepoSizeHandle Nothing) = return Nothing +liveRepoOffsets (RepoSizeHandle Nothing) = pure mempty diff --git a/Types/RepoSize.hs b/Types/RepoSize.hs index b113924669..26c19e75a3 100644 --- a/Types/RepoSize.hs +++ b/Types/RepoSize.hs @@ -28,6 +28,10 @@ newtype RepoSize = RepoSize { fromRepoSize :: Integer } newtype MaxSize = MaxSize { fromMaxSize :: Integer } deriving (Show, Eq, Ord) +-- An offset to the size of a repo. +newtype SizeOffset = SizeOffset { fromSizeChange :: Integer } + deriving (Show, Eq, Ord, Num) + -- Used when an action is in progress that will change the current size of -- a repository. -- diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 95ebc42ae6..bace31f5e7 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -35,173 +35,40 @@ Planned schedule of work: May not be a bug, needs reproducing and analysis. -* Concurrency issues with RepoSizes calculation and balanced content: +* Make sure that two threads don't check balanced preferred content at the + same time, so each thread always sees a consistent picture of what is + happening. Use locking as necessary. - * What if 2 concurrent threads are considering sending two different - keys to a repo at the same time. It can hold either but not both. - It should avoid sending both in this situation. +* When loading the live update table, check if PIDs in it are still + running (and are still git-annex), and if not, remove stale entries + from it, which can accumulate when processes are interrupted. + Note that it will be ok for the wrong git-annex process, running again + at a pid to keep a stale item in the live update table, because that + is unlikely and exponentially unlikely to happen repeatedly, so stale + information will only be used for a short time. - * There can also be a race with 2 concurrent threads where one just - finished sending to a repo, but has not yet updated the location log. - So the other one won't see an updated repo size. - - The fact that location log changes happen in CommandCleanup makes - this difficult to fix. + But then, how to check if a PID is git-annex or not? /proc of course, + but what about other OS's? Windows? - Could provisionally update Annex.reposizes before starting to send a - key, and roll it back if the send fails. But then Logs.Location - would update Annex.reposizes redundantly. So would need to remember - the provisional update was made until that is called.... But what if it - is never called for some reason? - - Also, in a race between two threads at the checking preferred content - stage, neither would have started sending yet, and so both would think - it was ok for them to. - - This race only really matters when the repo becomes full, - then the second thread will fail to send because it's full. Or will - send more than the configured maxsize. Still this would be good to - fix. - - * If all the above thread concurrency problems are fixed, separate - processes will still have concurrency problems. One case where that is - bad is a cluster accessed via ssh. Each connection to the cluster is - a separate process. So each will be unaware of changes made by others. - When `git-annex copy --to cluster -Jn` is used, this makes a single - command behave non-ideally, the same as the thread concurrency - problems. - - * Possible solution: - - Add to reposizes db a table for live updates. - Listing process ID, thread ID, UUID, key, addition or removal - (done) - - Add to reposizes db a table for sizechanges. This has for each UUID - a rolling total which is the total size changes that have accumulated - since the last update of the reposizes table. - So adding the reposizes table to sizechanges gives the current - size. - - Make checking the balanced preferred content limit record a - live update in the table (done) - - ... and use other live updates and sizechanges in making its decision - - Note: This will only work when preferred content is being checked. - If a git-annex copy without --auto is run, for example, it won't - tell other processes that it is in the process of filling up a remote. - That seems ok though, because if the user is running a command like - that, they are ok with a remote filling up. - - Make sure that two threads don't check balanced preferred content at the - same time, so each thread always sees a consistent picture of what is - happening. Use locking as necessary. - - When updating location log for a key, when there is actually a change, - update the db, remove the live update (done) and update the sizechanges - table in the same transaction (done). - - Two concurrent processes might both start the same action, eg dropping - a key, and both succeed, and so both update the location log. One needs - to update the log and the sizechanges table. The other needs to see - that it has no actual change to report, and so avoid updating the - location log (already the case) and avoid updating the sizechanges - table. (done) - - Detect when an upload (or drop) fails, and remove from the live - update table. (done) - - When loading the live update table, check if PIDs in it are still - running (and are still git-annex), and if not, remove stale entries - from it, which can accumulate when processes are interrupted. - Note that it will be ok for the wrong git-annex process, running again - at a pid to keep a stale item in the live update table, because that - is unlikely and exponentially unlikely to happen repeatedly, so stale - information will only be used for a short time. - - But then, how to check if a PID is git-annex or not? /proc of course, - but what about other OS's? Windows? - - How? Possibly have a thread that - waits on an empty MVar. Thread MVar through somehow to location log - update. (Seems this would need checking preferred content to return - the MVar? Or alternatively, the MVar could be passed into it, which - seems better..) Fill MVar on location log update. If MVar gets - GCed without being filled, the thread will get an exception and can - remove from table and cache then. This does rely on GC behavior, but if - the GC takes some time, it will just cause a failed upload to take - longer to get removed from the table and cache, which will just prevent - another upload of a different key from running immediately. - (Need to check if MVar GC behavior operates like this. - See https://stackoverflow.com/questions/10871303/killing-a-thread-when-mvar-is-garbage-collected ) - Perhaps stale entries can be found in a different way. Require the live - update table to be updated with a timestamp every 5 minutes. The thread - that waits on the MVar can do that, as long as the transfer is running. If - interrupted, it will become stale in 5 minutes, which is probably good - enough? Could do it every minute, depending on overhead. This could - also be done by just repeatedly touching a file named with the processes's - pid in it, to avoid sqlite overhead. - -* Still implementing LiveUpdate. Check for TODO XXX markers - -* Concurrency issue noted in commit db89e39df606b6ec292e0f1c3a7a60e317ac60f1 - - But: There will be a window where the redundant LiveUpdate is still - visible in the db, and processes can see it, combine it with the - rollingtotal, and arrive at the wrong size. This is a small window, but - it still ought to be addressed. Unsure if it would always be safe to - remove the redundant LiveUpdate? Consider the case where two drops and a - get are all running concurrently somehow, and the order they finish is - [drop, get, drop]. The second drop seems redundant to the first, but - it would not be safe to remove it. While this seems unlikely, it's hard - to rule out that a get and drop at different stages can both be running - at the same time. - - It also is possible for a redundant LiveUpdate to get added to the db - just after the rollingtotal was updated. In this case, combining the LiveUpdate - with the rollingtotal again yields the wrong reposize. - - So is the rollingtotal doomed to not be accurate? - - A separate table could be kept of recent updates. When combining a LiveUpdate - with the rollingtotal to get a reposize, first check if the LiveUpdate is - redundant given a recent update. When updating the RepoSizes table, clear the - recent updates table and the rolling totals table (in the same transaction). - This recent updates table could get fairly large, but only needs to be queried - for each current LiveUpdate, of which there are not ususally many running. - - When does a recent update mean a LiveUpdate is redundant? In the case of two drops, - the second is clearly redundant. But what about two gets and a drop? In this - case, after the first get, we don't know what order operations will - happen in. So the fact that the first get is in the recent updates table - should not make the second get be treated as redundant. - - So, look up each LiveUpdate in the recent updates table. When the same - operation is found there, look to see if there is any other LiveUpdate of - the same key and uuid, but with a different SizeChange. Only when there is - not is the LiveUpdate redundant. - - What if the recent updates table contains a get and a drop of the same - key. Now a get is running. Is it redundant? Perhaps the recent updates - table needs timestamps. More simply, when adding a drop to the recent - updates table, any existing get of the same key should be removed. - -* In the case where a copy to a remote fails (due eg to annex.diskreserve), - the LiveUpdate thread can not get a chance to catch its exception when - the LiveUpdate is gced, before git-annex exits. In this case, the - database is left with some stale entries in the live update table. - - This is not a big problem because the same can happen when the process is - interrupted. Still it would be cleaner for this not to happen. Is there - any way to prevent it? Waiting 1 GC tick before exiting would do it, - I'd think, but I tried manually doing a performGC at git-annex shutdown - and it didn't help. - - getLiveRepoSizes is an unfinished try at implementing the above. - -* Something needs to empty SizeChanges and RecentChanges when - setRepoSizes is called. While avoiding races. + How? Possibly have a thread that + waits on an empty MVar. Thread MVar through somehow to location log + update. (Seems this would need checking preferred content to return + the MVar? Or alternatively, the MVar could be passed into it, which + seems better..) Fill MVar on location log update. If MVar gets + GCed without being filled, the thread will get an exception and can + remove from table and cache then. This does rely on GC behavior, but if + the GC takes some time, it will just cause a failed upload to take + longer to get removed from the table and cache, which will just prevent + another upload of a different key from running immediately. + (Need to check if MVar GC behavior operates like this. + See https://stackoverflow.com/questions/10871303/killing-a-thread-when-mvar-is-garbage-collected ) + Perhaps stale entries can be found in a different way. Require the live + update table to be updated with a timestamp every 5 minutes. The thread + that waits on the MVar can do that, as long as the transfer is running. If + interrupted, it will become stale in 5 minutes, which is probably good + enough? Could do it every minute, depending on overhead. This could + also be done by just repeatedly touching a file named with the processes's + pid in it, to avoid sqlite overhead. * The assistant is using NoLiveUpdate, but it should be posssible to plumb a LiveUpdate through it from preferred content checking to location log @@ -222,6 +89,7 @@ Planned schedule of work: * Balanced preferred content basic implementation, including --rebalance option. * Implemented [[track_free_space_in_repos_via_git-annex_branch]] +* Implemented tracking of live changes to repository sizes. * `git-annex maxsize` * annex.fullybalancedthreshhold