finalize RepoSize database

Including locking on creation, handling of permissions errors, and
setting repo sizes.

I'm confident that locking is not needed while using this database.
Since writes happen in a single transaction. When there are two writers
that are recording sizes based on different git-annex branch commits,
one will overwrite what the other one recorded. Which is fine, it's only
necessary that the database stays consistent with the content of a
git-annex branch commit.
This commit is contained in:
Joey Hess 2024-08-15 11:50:01 -04:00
parent 63a3cedc45
commit eac4e9391b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 86 additions and 37 deletions

View file

@ -76,6 +76,7 @@ module Annex.Locations (
gitAnnexImportFeedDbDir, gitAnnexImportFeedDbDir,
gitAnnexImportFeedDbLock, gitAnnexImportFeedDbLock,
gitAnnexRepoSizeDbDir, gitAnnexRepoSizeDbDir,
gitAnnexRepoSizeDbLock,
gitAnnexScheduleState, gitAnnexScheduleState,
gitAnnexTransferDir, gitAnnexTransferDir,
gitAnnexCredsDir, gitAnnexCredsDir,
@ -521,6 +522,10 @@ gitAnnexRepoSizeDbDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexRepoSizeDbDir r c = gitAnnexRepoSizeDbDir r c =
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "reposize" fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "reposize"
{- Lock file for the reposize database. -}
gitAnnexRepoSizeDbLock :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexRepoSizeDbLock r c = gitAnnexRepoSizeDbDir r c <> ".lck"
{- .git/annex/schedulestate is used to store information about when {- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -} - scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> RawFilePath gitAnnexScheduleState :: Git.Repo -> RawFilePath

View file

@ -22,17 +22,18 @@ module Database.RepoSize (
RepoSizeHandle, RepoSizeHandle,
openDb, openDb,
closeDb, closeDb,
getRepoSizes, getRepoSizes,
setRepoSize, setRepoSizes,
updateRepoSize,
) where ) where
import Annex.Common
import Annex.LockFile
import Types.RepoSize import Types.RepoSize
import Database.Types () import Git.Types
import qualified Database.Queue as H import qualified Database.Queue as H
import Database.Init import Database.Init
import Annex.Locations import Database.Utility
import Annex.Common import Database.Types
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import Database.Persist.Sql hiding (Key) import Database.Persist.Sql hiding (Key)
@ -40,62 +41,107 @@ import Database.Persist.TH
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
import qualified Data.Map as M import qualified Data.Map as M
newtype RepoSizeHandle = RepoSizeHandle H.DbQueue newtype RepoSizeHandle = RepoSizeHandle (Maybe H.DbQueue)
share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase| share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase|
-- Corresponds to location log information from the git-annex branch.
RepoSizes RepoSizes
repo UUID repo UUID
size Integer size Integer
UniqueRepo repo UniqueRepo repo
-- The last git-annex branch commit that was used to update RepoSizes.
AnnexBranch
commit SSha
UniqueCommit commit
|] |]
{- Opens the database, creating it if it doesn't exist yet. {- Opens the database, creating it if it doesn't exist yet.
- -
- No locking is done by this, so caller must prevent multiple processes - Multiple readers and writers can have the database open at the same
- running this at the same time. - time. Database.Handle deals with the concurrency issues.
- The lock is held while opening the database, so that when
- the database doesn't exist yet, one caller wins the lock and
- can create it undisturbed.
-} -}
openDb :: Annex RepoSizeHandle openDb :: Annex RepoSizeHandle
openDb = do openDb = do
dbdir <- calcRepo' gitAnnexRepoSizeDbDir lck <- calcRepo' gitAnnexRepoSizeDbLock
let db = dbdir P.</> "db" catchPermissionDenied permerr $ withExclusiveLock lck $ do
unlessM (liftIO $ R.doesPathExist db) $ do dbdir <- calcRepo' gitAnnexRepoSizeDbDir
initDb db $ void $ let db = dbdir P.</> "db"
runMigrationSilent migrateRepoSizes unlessM (liftIO $ R.doesPathExist db) $ do
h <- liftIO $ H.openDbQueue db "reposizes" initDb db $ void $
return $ RepoSizeHandle h runMigrationSilent migrateRepoSizes
h <- liftIO $ H.openDbQueue db "repo_sizes"
return $ RepoSizeHandle (Just h)
where
-- If permissions don't allow opening the database,
-- just don't use it. Since this database is just a cache
-- of information available in the git-annex branch, the same
-- information can be queried from the branch, though much less
-- efficiently.
permerr _e = return (RepoSizeHandle Nothing)
closeDb :: RepoSizeHandle -> Annex () closeDb :: RepoSizeHandle -> Annex ()
closeDb (RepoSizeHandle h) = liftIO $ H.closeDbQueue h closeDb (RepoSizeHandle (Just h)) = liftIO $ H.closeDbQueue h
closeDb (RepoSizeHandle Nothing) = noop
{- Doesn't see changes that were just made with setRepoSize or getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize, Maybe Sha)
- updateRepoSize before flushing the queue. -} getRepoSizes (RepoSizeHandle (Just h)) = H.queryDbQueue h $ do
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize) sizemap <- M.fromList . map conv <$> getRepoSizes'
getRepoSizes (RepoSizeHandle h) = H.queryDbQueue h $ annexbranchsha <- getAnnexBranchCommit
M.fromList . map conv <$> getRepoSizes' return (sizemap, annexbranchsha)
where where
conv entity = conv entity =
let RepoSizes u sz = entityVal entity let RepoSizes u sz = entityVal entity
in (u, RepoSize sz) in (u, RepoSize sz)
getRepoSizes (RepoSizeHandle Nothing) = return (mempty, Nothing)
getRepoSizes' :: SqlPersistM [Entity RepoSizes] getRepoSizes' :: SqlPersistM [Entity RepoSizes]
getRepoSizes' = selectList [] [] getRepoSizes' = selectList [] []
setRepoSize :: UUID -> RepoSize -> RepoSizeHandle -> IO () getAnnexBranchCommit :: SqlPersistM (Maybe Sha)
setRepoSize u (RepoSize sz) (RepoSizeHandle h) = H.queueDb h checkCommit $ getAnnexBranchCommit = do
l <- selectList ([] :: [Filter AnnexBranch]) []
case l of
(s:[]) -> return $ Just $ fromSSha $
annexBranchCommit $ entityVal s
_ -> return Nothing
{- Updates the recorded sizes of all repositories.
-
- This can be called without locking since the update runs in a single
- transaction.
-
- Any repositories that are not in the provided map, but do have a size
- recorded in the database will have it cleared. This is unlikely to
- happen, but ensures that the database is consistent.
-}
setRepoSizes :: RepoSizeHandle -> M.Map UUID RepoSize -> Sha -> IO ()
setRepoSizes (RepoSizeHandle (Just h)) sizemap branchcommitsha =
H.queueDb h commitimmediately $ do
l <- getRepoSizes'
forM_ (map entityVal l) $ \(RepoSizes u _) ->
unless (M.member u sizemap) $
unsetRepoSize u
forM_ (M.toList sizemap) $
uncurry setRepoSize
recordAnnexBranchCommit branchcommitsha
where
commitimmediately _ _ = pure True
setRepoSizes (RepoSizeHandle Nothing) _ _ = noop
setRepoSize :: UUID -> RepoSize -> SqlPersistM ()
setRepoSize u (RepoSize sz) =
void $ upsertBy void $ upsertBy
(UniqueRepo u) (UniqueRepo u)
(RepoSizes u sz) (RepoSizes u sz)
[RepoSizesSize =. sz] [RepoSizesSize =. sz]
{- Applies an offset to the size. If no size is recorded for the repo, does unsetRepoSize :: UUID -> SqlPersistM ()
- nothing. -} unsetRepoSize u = deleteWhere [RepoSizesRepo ==. u]
updateRepoSize :: UUID -> Integer -> RepoSizeHandle -> IO ()
updateRepoSize u offset (RepoSizeHandle h) = H.queueDb h checkCommit $
void $ updateWhere
[RepoSizesRepo ==. u]
[RepoSizesSize +=. offset]
checkCommit :: H.QueueSize -> H.LastCommitTime -> IO Bool recordAnnexBranchCommit :: Sha -> SqlPersistM ()
checkCommit sz _lastcommittime recordAnnexBranchCommit branchcommitsha = do
| sz > 1000 = return True deleteWhere ([] :: [Filter AnnexBranch])
| otherwise = return False void $ insertUniqueFast $ AnnexBranch $ toSSha branchcommitsha

View file

@ -55,8 +55,6 @@ Planned schedule of work:
* Goal is for limitFullyBalanced not to need to calcRepoSizes. * Goal is for limitFullyBalanced not to need to calcRepoSizes.
* Add git-annex branch sha to Database.RepoSizes.
* When Annex.reposizes does not list the size of a UUID, and * When Annex.reposizes does not list the size of a UUID, and
that UUID's size is needed eg for balanced preferred that UUID's size is needed eg for balanced preferred
content, use calcRepoSizes and store in content, use calcRepoSizes and store in