2024-08-12 15:19:58 +00:00
|
|
|
{- Sqlite database used to track the sizes of repositories.
|
|
|
|
-
|
|
|
|
- Copyright 2024 Joey Hess <id@joeyh.name>
|
|
|
|
-:
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
|
|
|
|
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
|
|
|
|
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
|
|
|
|
{-# LANGUAGE DataKinds, FlexibleInstances #-}
|
|
|
|
{-# LANGUAGE RankNTypes #-}
|
|
|
|
{-# LANGUAGE UndecidableInstances #-}
|
|
|
|
{-# LANGUAGE TypeOperators #-}
|
|
|
|
#if MIN_VERSION_persistent_template(2,8,0)
|
|
|
|
{-# LANGUAGE DerivingStrategies #-}
|
|
|
|
{-# LANGUAGE StandaloneDeriving #-}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
module Database.RepoSize (
|
|
|
|
RepoSizeHandle,
|
2024-08-23 20:35:12 +00:00
|
|
|
getRepoSizeHandle,
|
2024-08-12 15:19:58 +00:00
|
|
|
openDb,
|
|
|
|
closeDb,
|
2024-08-15 15:50:01 +00:00
|
|
|
getRepoSizes,
|
|
|
|
setRepoSizes,
|
2024-08-26 18:50:09 +00:00
|
|
|
getLiveRepoSizes,
|
2024-08-23 16:51:00 +00:00
|
|
|
startingLiveSizeChange,
|
2024-08-25 14:34:47 +00:00
|
|
|
successfullyFinishedLiveSizeChange,
|
2024-08-26 18:50:09 +00:00
|
|
|
removeStaleLiveSizeChange,
|
2024-08-12 15:19:58 +00:00
|
|
|
) where
|
|
|
|
|
2024-08-15 15:50:01 +00:00
|
|
|
import Annex.Common
|
2024-08-23 20:35:12 +00:00
|
|
|
import qualified Annex
|
|
|
|
import Database.RepoSize.Handle
|
2024-08-15 16:31:27 +00:00
|
|
|
import qualified Database.Handle as H
|
2024-08-12 15:19:58 +00:00
|
|
|
import Database.Init
|
2024-08-15 15:50:01 +00:00
|
|
|
import Database.Utility
|
|
|
|
import Database.Types
|
2024-08-23 20:35:12 +00:00
|
|
|
import Annex.LockFile
|
|
|
|
import Git.Types
|
2024-08-12 15:19:58 +00:00
|
|
|
import qualified Utility.RawFilePath as R
|
|
|
|
|
|
|
|
import Database.Persist.Sql hiding (Key)
|
|
|
|
import Database.Persist.TH
|
|
|
|
import qualified System.FilePath.ByteString as P
|
2024-08-26 18:50:09 +00:00
|
|
|
import qualified Data.Map.Strict as M
|
|
|
|
import qualified Data.Set as S
|
2024-08-12 15:19:58 +00:00
|
|
|
|
|
|
|
share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase|
|
2024-08-15 15:50:01 +00:00
|
|
|
-- Corresponds to location log information from the git-annex branch.
|
2024-08-12 15:19:58 +00:00
|
|
|
RepoSizes
|
|
|
|
repo UUID
|
2024-08-25 12:22:40 +00:00
|
|
|
size FileSize
|
2024-08-12 15:19:58 +00:00
|
|
|
UniqueRepo repo
|
2024-08-15 15:50:01 +00:00
|
|
|
-- The last git-annex branch commit that was used to update RepoSizes.
|
|
|
|
AnnexBranch
|
|
|
|
commit SSha
|
|
|
|
UniqueCommit commit
|
2024-08-23 16:51:00 +00:00
|
|
|
-- Changes that are currently being made that affect repo sizes.
|
2024-08-25 14:34:47 +00:00
|
|
|
-- (Only updated when preferred content expressions are in use that need
|
|
|
|
-- live size changes.)
|
2024-08-23 16:51:00 +00:00
|
|
|
LiveSizeChanges
|
|
|
|
repo UUID
|
|
|
|
key Key
|
2024-08-25 14:34:47 +00:00
|
|
|
changeid SizeChangeId
|
2024-08-23 16:51:00 +00:00
|
|
|
change SizeChange
|
2024-08-25 14:34:47 +00:00
|
|
|
UniqueLiveSizeChange repo key changeid
|
|
|
|
-- A rolling total of size changes that were removed from LiveSizeChanges
|
|
|
|
-- upon successful completion.
|
|
|
|
SizeChanges
|
|
|
|
repo UUID
|
|
|
|
rollingtotal FileSize
|
|
|
|
UniqueRepoRollingTotal repo
|
2024-08-26 18:50:09 +00:00
|
|
|
-- The most recent size changes that were removed from LiveSizeChanges
|
|
|
|
-- upon successful completion.
|
|
|
|
RecentChanges
|
|
|
|
repo UUID
|
|
|
|
key Key
|
|
|
|
change SizeChange
|
|
|
|
UniqueRecentChange repo key
|
2024-08-12 15:19:58 +00:00
|
|
|
|]
|
|
|
|
|
2024-08-23 20:35:12 +00:00
|
|
|
{- Gets a handle to the database. It's cached in Annex state. -}
|
|
|
|
getRepoSizeHandle :: Annex RepoSizeHandle
|
|
|
|
getRepoSizeHandle = Annex.getState Annex.reposizehandle >>= \case
|
|
|
|
Just h -> return h
|
|
|
|
Nothing -> do
|
|
|
|
h <- openDb
|
|
|
|
Annex.changeState $ \s -> s { Annex.reposizehandle = Just h }
|
|
|
|
return h
|
|
|
|
|
2024-08-12 15:19:58 +00:00
|
|
|
{- Opens the database, creating it if it doesn't exist yet.
|
|
|
|
-
|
2024-08-15 15:50:01 +00:00
|
|
|
- Multiple readers and writers can have the database open at the same
|
|
|
|
- time. Database.Handle deals with the concurrency issues.
|
|
|
|
- The lock is held while opening the database, so that when
|
|
|
|
- the database doesn't exist yet, one caller wins the lock and
|
|
|
|
- can create it undisturbed.
|
2024-08-12 15:19:58 +00:00
|
|
|
-}
|
|
|
|
openDb :: Annex RepoSizeHandle
|
|
|
|
openDb = do
|
2024-08-15 15:50:01 +00:00
|
|
|
lck <- calcRepo' gitAnnexRepoSizeDbLock
|
|
|
|
catchPermissionDenied permerr $ withExclusiveLock lck $ do
|
|
|
|
dbdir <- calcRepo' gitAnnexRepoSizeDbDir
|
|
|
|
let db = dbdir P.</> "db"
|
|
|
|
unlessM (liftIO $ R.doesPathExist db) $ do
|
|
|
|
initDb db $ void $
|
|
|
|
runMigrationSilent migrateRepoSizes
|
2024-08-15 16:31:27 +00:00
|
|
|
h <- liftIO $ H.openDb db "repo_sizes"
|
2024-08-15 15:50:01 +00:00
|
|
|
return $ RepoSizeHandle (Just h)
|
|
|
|
where
|
|
|
|
-- If permissions don't allow opening the database,
|
|
|
|
-- just don't use it. Since this database is just a cache
|
|
|
|
-- of information available in the git-annex branch, the same
|
|
|
|
-- information can be queried from the branch, though much less
|
|
|
|
-- efficiently.
|
|
|
|
permerr _e = return (RepoSizeHandle Nothing)
|
2024-08-12 15:19:58 +00:00
|
|
|
|
|
|
|
closeDb :: RepoSizeHandle -> Annex ()
|
2024-08-15 16:31:27 +00:00
|
|
|
closeDb (RepoSizeHandle (Just h)) = liftIO $ H.closeDb h
|
2024-08-15 15:50:01 +00:00
|
|
|
closeDb (RepoSizeHandle Nothing) = noop
|
2024-08-12 15:19:58 +00:00
|
|
|
|
2024-08-26 18:50:09 +00:00
|
|
|
{- Gets the sizes of repositories as of a commit to the git-annex
|
|
|
|
- branch. -}
|
2024-08-15 15:50:01 +00:00
|
|
|
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize, Maybe Sha)
|
2024-08-15 16:31:27 +00:00
|
|
|
getRepoSizes (RepoSizeHandle (Just h)) = H.queryDb h $ do
|
2024-08-26 18:50:09 +00:00
|
|
|
sizemap <- M.fromList <$> getRepoSizes'
|
2024-08-15 15:50:01 +00:00
|
|
|
annexbranchsha <- getAnnexBranchCommit
|
|
|
|
return (sizemap, annexbranchsha)
|
2024-08-26 18:50:09 +00:00
|
|
|
getRepoSizes (RepoSizeHandle Nothing) = return (mempty, Nothing)
|
|
|
|
|
|
|
|
getRepoSizes' :: SqlPersistM [(UUID, RepoSize)]
|
|
|
|
getRepoSizes' = map conv <$> selectList [] []
|
2024-08-12 15:19:58 +00:00
|
|
|
where
|
|
|
|
conv entity =
|
|
|
|
let RepoSizes u sz = entityVal entity
|
|
|
|
in (u, RepoSize sz)
|
|
|
|
|
2024-08-15 15:50:01 +00:00
|
|
|
getAnnexBranchCommit :: SqlPersistM (Maybe Sha)
|
|
|
|
getAnnexBranchCommit = do
|
|
|
|
l <- selectList ([] :: [Filter AnnexBranch]) []
|
|
|
|
case l of
|
|
|
|
(s:[]) -> return $ Just $ fromSSha $
|
|
|
|
annexBranchCommit $ entityVal s
|
|
|
|
_ -> return Nothing
|
|
|
|
|
|
|
|
{- Updates the recorded sizes of all repositories.
|
|
|
|
-
|
|
|
|
- This can be called without locking since the update runs in a single
|
|
|
|
- transaction.
|
|
|
|
-
|
|
|
|
- Any repositories that are not in the provided map, but do have a size
|
|
|
|
- recorded in the database will have it cleared. This is unlikely to
|
|
|
|
- happen, but ensures that the database is consistent.
|
|
|
|
-}
|
|
|
|
setRepoSizes :: RepoSizeHandle -> M.Map UUID RepoSize -> Sha -> IO ()
|
|
|
|
setRepoSizes (RepoSizeHandle (Just h)) sizemap branchcommitsha =
|
2024-08-15 16:31:27 +00:00
|
|
|
H.commitDb h $ do
|
2024-08-26 18:50:09 +00:00
|
|
|
l <- getRepoSizes'
|
|
|
|
forM_ (map fst l) $ \u ->
|
2024-08-15 15:50:01 +00:00
|
|
|
unless (M.member u sizemap) $
|
|
|
|
unsetRepoSize u
|
|
|
|
forM_ (M.toList sizemap) $
|
|
|
|
uncurry setRepoSize
|
|
|
|
recordAnnexBranchCommit branchcommitsha
|
|
|
|
setRepoSizes (RepoSizeHandle Nothing) _ _ = noop
|
|
|
|
|
|
|
|
setRepoSize :: UUID -> RepoSize -> SqlPersistM ()
|
|
|
|
setRepoSize u (RepoSize sz) =
|
2024-08-12 15:19:58 +00:00
|
|
|
void $ upsertBy
|
|
|
|
(UniqueRepo u)
|
|
|
|
(RepoSizes u sz)
|
|
|
|
[RepoSizesSize =. sz]
|
|
|
|
|
2024-08-15 15:50:01 +00:00
|
|
|
unsetRepoSize :: UUID -> SqlPersistM ()
|
|
|
|
unsetRepoSize u = deleteWhere [RepoSizesRepo ==. u]
|
2024-08-12 15:19:58 +00:00
|
|
|
|
2024-08-15 15:50:01 +00:00
|
|
|
recordAnnexBranchCommit :: Sha -> SqlPersistM ()
|
|
|
|
recordAnnexBranchCommit branchcommitsha = do
|
|
|
|
deleteWhere ([] :: [Filter AnnexBranch])
|
|
|
|
void $ insertUniqueFast $ AnnexBranch $ toSSha branchcommitsha
|
2024-08-23 16:51:00 +00:00
|
|
|
|
2024-08-25 14:34:47 +00:00
|
|
|
startingLiveSizeChange :: RepoSizeHandle -> UUID -> Key -> SizeChange -> SizeChangeId -> IO ()
|
|
|
|
startingLiveSizeChange (RepoSizeHandle (Just h)) u k sc sid =
|
2024-08-23 20:35:12 +00:00
|
|
|
H.commitDb h $ void $ upsertBy
|
2024-08-25 14:34:47 +00:00
|
|
|
(UniqueLiveSizeChange u k sid)
|
|
|
|
(LiveSizeChanges u k sid sc)
|
|
|
|
[ LiveSizeChangesChange =. sc
|
|
|
|
, LiveSizeChangesChangeid =. sid
|
|
|
|
]
|
|
|
|
startingLiveSizeChange (RepoSizeHandle Nothing) _ _ _ _ = noop
|
|
|
|
|
|
|
|
successfullyFinishedLiveSizeChange :: RepoSizeHandle -> UUID -> Key -> SizeChange -> SizeChangeId -> IO ()
|
|
|
|
successfullyFinishedLiveSizeChange (RepoSizeHandle (Just h)) u k sc sid =
|
|
|
|
H.commitDb h $ do
|
2024-08-26 18:50:09 +00:00
|
|
|
-- Update the rolling total, add as a recent change,
|
|
|
|
-- and remove the live change in the same transaction.
|
2024-08-25 14:34:47 +00:00
|
|
|
rollingtotal <- getSizeChangeFor u
|
2024-08-26 18:50:09 +00:00
|
|
|
setSizeChangeFor u (updateRollingTotal rollingtotal sc k)
|
|
|
|
addRecentChange u k sc
|
2024-08-25 14:34:47 +00:00
|
|
|
removeLiveSizeChange u k sc sid
|
|
|
|
where
|
|
|
|
updaterollingtotal t = case sc of
|
|
|
|
AddingKey -> t + ksz
|
|
|
|
RemovingKey -> t - ksz
|
|
|
|
ksz = fromMaybe 0 $ fromKey keySize k
|
|
|
|
successfullyFinishedLiveSizeChange (RepoSizeHandle Nothing) _ _ _ _ = noop
|
|
|
|
|
2024-08-26 18:50:09 +00:00
|
|
|
updateRollingTotal :: FileSize -> SizeChange -> Key -> FileSize
|
|
|
|
updateRollingTotal t sc k = case sc of
|
|
|
|
AddingKey -> t + ksz
|
|
|
|
RemovingKey -> t - ksz
|
|
|
|
where
|
|
|
|
ksz = fromMaybe 0 $ fromKey keySize k
|
|
|
|
|
|
|
|
removeStaleLiveSizeChange :: RepoSizeHandle -> UUID -> Key -> SizeChange -> SizeChangeId -> IO ()
|
|
|
|
removeStaleLiveSizeChange (RepoSizeHandle (Just h)) u k sc sid =
|
2024-08-25 14:34:47 +00:00
|
|
|
H.commitDb h $ removeLiveSizeChange u k sc sid
|
2024-08-26 18:50:09 +00:00
|
|
|
removeStaleLiveSizeChange (RepoSizeHandle Nothing) _ _ _ _ = noop
|
2024-08-25 14:34:47 +00:00
|
|
|
|
|
|
|
removeLiveSizeChange :: UUID -> Key -> SizeChange -> SizeChangeId -> SqlPersistM ()
|
|
|
|
removeLiveSizeChange u k sc sid =
|
|
|
|
deleteWhere
|
2024-08-23 20:35:12 +00:00
|
|
|
[ LiveSizeChangesRepo ==. u
|
|
|
|
, LiveSizeChangesKey ==. k
|
2024-08-25 14:34:47 +00:00
|
|
|
, LiveSizeChangesChangeid ==. sid
|
2024-08-23 20:35:12 +00:00
|
|
|
, LiveSizeChangesChange ==. sc
|
|
|
|
]
|
2024-08-23 16:51:00 +00:00
|
|
|
|
2024-08-26 18:50:09 +00:00
|
|
|
getLiveSizeChanges :: SqlPersistM (M.Map UUID [(Key, (SizeChange, SizeChangeId))])
|
|
|
|
getLiveSizeChanges = M.fromListWith (++) . map conv <$> selectList [] []
|
2024-08-23 16:51:00 +00:00
|
|
|
where
|
|
|
|
conv entity =
|
2024-08-25 14:34:47 +00:00
|
|
|
let LiveSizeChanges u k sid sc = entityVal entity
|
2024-08-26 18:50:09 +00:00
|
|
|
in (u, [(k, (sc, sid))])
|
2024-08-25 14:34:47 +00:00
|
|
|
|
2024-08-26 18:50:09 +00:00
|
|
|
getSizeChanges :: SqlPersistM (M.Map UUID FileSize)
|
|
|
|
getSizeChanges = M.fromList . map conv <$> selectList [] []
|
2024-08-25 14:34:47 +00:00
|
|
|
where
|
|
|
|
conv entity =
|
|
|
|
let SizeChanges u n = entityVal entity
|
|
|
|
in (u, n)
|
|
|
|
|
|
|
|
getSizeChangeFor :: UUID -> SqlPersistM FileSize
|
|
|
|
getSizeChangeFor u = do
|
|
|
|
l <- selectList [SizeChangesRepo ==. u] []
|
|
|
|
return $ case l of
|
|
|
|
(s:_) -> sizeChangesRollingtotal $ entityVal s
|
|
|
|
[] -> 0
|
|
|
|
|
|
|
|
setSizeChangeFor :: UUID -> FileSize -> SqlPersistM ()
|
|
|
|
setSizeChangeFor u sz =
|
|
|
|
void $ upsertBy
|
|
|
|
(UniqueRepoRollingTotal u)
|
|
|
|
(SizeChanges u sz)
|
|
|
|
[SizeChangesRollingtotal =. sz]
|
2024-08-26 18:50:09 +00:00
|
|
|
|
|
|
|
addRecentChange :: UUID -> Key -> SizeChange -> SqlPersistM ()
|
|
|
|
addRecentChange u k sc =
|
|
|
|
void $ upsertBy
|
|
|
|
(UniqueRecentChange u k)
|
|
|
|
(RecentChanges u k sc)
|
|
|
|
[RecentChangesChange =. sc]
|
|
|
|
|
|
|
|
getRecentChange :: UUID -> Key -> SqlPersistM (Maybe SizeChange)
|
|
|
|
getRecentChange u k = do
|
|
|
|
l <- selectList
|
|
|
|
[ RecentChangesRepo ==. u
|
|
|
|
, RecentChangesKey ==. k
|
|
|
|
] []
|
|
|
|
return $ case l of
|
|
|
|
(s:_) -> Just $ recentChangesChange $ entityVal s
|
|
|
|
[] -> Nothing
|
|
|
|
|
|
|
|
{- Gets the sizes of Repos as of a commit to the git-annex branch
|
|
|
|
- (which is not necessarily the current commit), adjusted with all
|
|
|
|
- live changes that have happened since then or are happening now.
|
|
|
|
-
|
|
|
|
- This does not necessarily include all changes that have been journalled,
|
|
|
|
- only ones that had startingLiveSizeChange called for them will be
|
|
|
|
- included. Also live changes or recent changes that were to a UUID not in
|
|
|
|
- the RepoSizes map are not included.
|
|
|
|
-
|
|
|
|
- In the unlikely case where two live changes are occurring, one
|
|
|
|
- adding a key and the other removing the same key, the one
|
|
|
|
- adding the key is used, in order to err on the side of a larger
|
|
|
|
- RepoSize.
|
|
|
|
-
|
|
|
|
- Omits live changes that are redundant due to a recent change already
|
|
|
|
- being recorded for the same change.
|
|
|
|
-
|
|
|
|
- This is only expensive when there are a lot of live changes happening at
|
|
|
|
- the same time.
|
|
|
|
-}
|
|
|
|
getLiveRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize, Maybe Sha)
|
|
|
|
getLiveRepoSizes (RepoSizeHandle (Just h)) = H.queryDb h $ do
|
|
|
|
sizechanges <- getSizeChanges
|
|
|
|
livechanges <- getLiveSizeChanges
|
|
|
|
reposizes <- getRepoSizes'
|
|
|
|
annexbranchsha <- getAnnexBranchCommit
|
|
|
|
m <- M.fromList <$> forM reposizes (go sizechanges livechanges)
|
|
|
|
return (m, annexbranchsha)
|
|
|
|
where
|
|
|
|
go
|
|
|
|
:: M.Map UUID FileSize
|
|
|
|
-> M.Map UUID [(Key, (SizeChange, SizeChangeId))]
|
|
|
|
-> (UUID, RepoSize)
|
|
|
|
-> SqlPersistM (UUID, RepoSize)
|
|
|
|
go sizechanges livechanges (u, RepoSize startsize) = do
|
|
|
|
let livechangesbykey =
|
|
|
|
M.fromListWith (++) $ maybe [] (\v -> [v]) $
|
|
|
|
M.lookup u livechanges
|
|
|
|
livechanges' <- combinelikelivechanges <$>
|
|
|
|
filterM (nonredundantlivechange livechangesbykey u)
|
|
|
|
(fromMaybe [] $ M.lookup u livechanges)
|
|
|
|
let sizechange = foldl'
|
|
|
|
(\t (k, sc) -> updateRollingTotal t sc k)
|
|
|
|
(fromMaybe 0 (M.lookup u sizechanges))
|
|
|
|
livechanges'
|
|
|
|
return (u, RepoSize (startsize + sizechange))
|
|
|
|
|
|
|
|
combinelikelivechanges =
|
|
|
|
S.elems
|
|
|
|
. S.fromList
|
|
|
|
. map (\(k, (sc, _)) -> (k, sc))
|
|
|
|
|
|
|
|
nonredundantlivechange
|
|
|
|
:: M.Map Key [(SizeChange, SizeChangeId)]
|
|
|
|
-> UUID
|
|
|
|
-> (Key, (SizeChange, SizeChangeId))
|
|
|
|
-> SqlPersistM Bool
|
|
|
|
nonredundantlivechange livechangesbykey u (k, (sc, cid))
|
|
|
|
| null (competinglivechanges livechangesbykey k sc cid) =
|
|
|
|
getRecentChange u k >>= pure . \case
|
|
|
|
Nothing -> True
|
|
|
|
Just sc' -> sc /= sc'
|
|
|
|
| otherwise = pure False
|
|
|
|
|
|
|
|
competinglivechanges
|
|
|
|
:: M.Map Key [(SizeChange, SizeChangeId)]
|
|
|
|
-> Key
|
|
|
|
-> SizeChange
|
|
|
|
-> SizeChangeId
|
|
|
|
-> [(SizeChange, SizeChangeId)]
|
|
|
|
competinglivechanges livechangesbykey k RemovingKey cid =
|
|
|
|
filter (\(sc', cid') -> cid /= cid' && sc' == AddingKey)
|
|
|
|
(fromMaybe [] $ M.lookup k livechangesbykey)
|
|
|
|
competinglivechanges _ _ AddingKey _ = []
|
|
|
|
getLiveRepoSizes (RepoSizeHandle Nothing) = return mempty
|