finalize RepoSize database

Including locking on creation, handling of permissions errors, and
setting repo sizes.

I'm confident that locking is not needed while using this database.
Since writes happen in a single transaction. When there are two writers
that are recording sizes based on different git-annex branch commits,
one will overwrite what the other one recorded. Which is fine, it's only
necessary that the database stays consistent with the content of a
git-annex branch commit.
This commit is contained in:
Joey Hess 2024-08-15 11:50:01 -04:00
parent 63a3cedc45
commit eac4e9391b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 86 additions and 37 deletions

View file

@ -76,6 +76,7 @@ module Annex.Locations (
gitAnnexImportFeedDbDir,
gitAnnexImportFeedDbLock,
gitAnnexRepoSizeDbDir,
gitAnnexRepoSizeDbLock,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
@ -521,6 +522,10 @@ gitAnnexRepoSizeDbDir :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexRepoSizeDbDir r c =
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "reposize"
{- Lock file for the reposize database. -}
gitAnnexRepoSizeDbLock :: Git.Repo -> GitConfig -> RawFilePath
gitAnnexRepoSizeDbLock r c = gitAnnexRepoSizeDbDir r c <> ".lck"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> RawFilePath

View file

@ -22,17 +22,18 @@ module Database.RepoSize (
RepoSizeHandle,
openDb,
closeDb,
getRepoSizes,
setRepoSize,
updateRepoSize,
getRepoSizes,
setRepoSizes,
) where
import Annex.Common
import Annex.LockFile
import Types.RepoSize
import Database.Types ()
import Git.Types
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
import Annex.Common
import Database.Utility
import Database.Types
import qualified Utility.RawFilePath as R
import Database.Persist.Sql hiding (Key)
@ -40,62 +41,107 @@ import Database.Persist.TH
import qualified System.FilePath.ByteString as P
import qualified Data.Map as M
newtype RepoSizeHandle = RepoSizeHandle H.DbQueue
newtype RepoSizeHandle = RepoSizeHandle (Maybe H.DbQueue)
share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase|
-- Corresponds to location log information from the git-annex branch.
RepoSizes
repo UUID
size Integer
UniqueRepo repo
-- The last git-annex branch commit that was used to update RepoSizes.
AnnexBranch
commit SSha
UniqueCommit commit
|]
{- Opens the database, creating it if it doesn't exist yet.
-
- No locking is done by this, so caller must prevent multiple processes
- running this at the same time.
- Multiple readers and writers can have the database open at the same
- time. Database.Handle deals with the concurrency issues.
- The lock is held while opening the database, so that when
- the database doesn't exist yet, one caller wins the lock and
- can create it undisturbed.
-}
openDb :: Annex RepoSizeHandle
openDb = do
dbdir <- calcRepo' gitAnnexRepoSizeDbDir
let db = dbdir P.</> "db"
unlessM (liftIO $ R.doesPathExist db) $ do
initDb db $ void $
runMigrationSilent migrateRepoSizes
h <- liftIO $ H.openDbQueue db "reposizes"
return $ RepoSizeHandle h
lck <- calcRepo' gitAnnexRepoSizeDbLock
catchPermissionDenied permerr $ withExclusiveLock lck $ do
dbdir <- calcRepo' gitAnnexRepoSizeDbDir
let db = dbdir P.</> "db"
unlessM (liftIO $ R.doesPathExist db) $ do
initDb db $ void $
runMigrationSilent migrateRepoSizes
h <- liftIO $ H.openDbQueue db "repo_sizes"
return $ RepoSizeHandle (Just h)
where
-- If permissions don't allow opening the database,
-- just don't use it. Since this database is just a cache
-- of information available in the git-annex branch, the same
-- information can be queried from the branch, though much less
-- efficiently.
permerr _e = return (RepoSizeHandle Nothing)
closeDb :: RepoSizeHandle -> Annex ()
closeDb (RepoSizeHandle h) = liftIO $ H.closeDbQueue h
closeDb (RepoSizeHandle (Just h)) = liftIO $ H.closeDbQueue h
closeDb (RepoSizeHandle Nothing) = noop
{- Doesn't see changes that were just made with setRepoSize or
- updateRepoSize before flushing the queue. -}
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize)
getRepoSizes (RepoSizeHandle h) = H.queryDbQueue h $
M.fromList . map conv <$> getRepoSizes'
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize, Maybe Sha)
getRepoSizes (RepoSizeHandle (Just h)) = H.queryDbQueue h $ do
sizemap <- M.fromList . map conv <$> getRepoSizes'
annexbranchsha <- getAnnexBranchCommit
return (sizemap, annexbranchsha)
where
conv entity =
let RepoSizes u sz = entityVal entity
in (u, RepoSize sz)
getRepoSizes (RepoSizeHandle Nothing) = return (mempty, Nothing)
getRepoSizes' :: SqlPersistM [Entity RepoSizes]
getRepoSizes' = selectList [] []
setRepoSize :: UUID -> RepoSize -> RepoSizeHandle -> IO ()
setRepoSize u (RepoSize sz) (RepoSizeHandle h) = H.queueDb h checkCommit $
getAnnexBranchCommit :: SqlPersistM (Maybe Sha)
getAnnexBranchCommit = do
l <- selectList ([] :: [Filter AnnexBranch]) []
case l of
(s:[]) -> return $ Just $ fromSSha $
annexBranchCommit $ entityVal s
_ -> return Nothing
{- Updates the recorded sizes of all repositories.
-
- This can be called without locking since the update runs in a single
- transaction.
-
- Any repositories that are not in the provided map, but do have a size
- recorded in the database will have it cleared. This is unlikely to
- happen, but ensures that the database is consistent.
-}
setRepoSizes :: RepoSizeHandle -> M.Map UUID RepoSize -> Sha -> IO ()
setRepoSizes (RepoSizeHandle (Just h)) sizemap branchcommitsha =
H.queueDb h commitimmediately $ do
l <- getRepoSizes'
forM_ (map entityVal l) $ \(RepoSizes u _) ->
unless (M.member u sizemap) $
unsetRepoSize u
forM_ (M.toList sizemap) $
uncurry setRepoSize
recordAnnexBranchCommit branchcommitsha
where
commitimmediately _ _ = pure True
setRepoSizes (RepoSizeHandle Nothing) _ _ = noop
setRepoSize :: UUID -> RepoSize -> SqlPersistM ()
setRepoSize u (RepoSize sz) =
void $ upsertBy
(UniqueRepo u)
(RepoSizes u sz)
[RepoSizesSize =. sz]
{- Applies an offset to the size. If no size is recorded for the repo, does
- nothing. -}
updateRepoSize :: UUID -> Integer -> RepoSizeHandle -> IO ()
updateRepoSize u offset (RepoSizeHandle h) = H.queueDb h checkCommit $
void $ updateWhere
[RepoSizesRepo ==. u]
[RepoSizesSize +=. offset]
unsetRepoSize :: UUID -> SqlPersistM ()
unsetRepoSize u = deleteWhere [RepoSizesRepo ==. u]
checkCommit :: H.QueueSize -> H.LastCommitTime -> IO Bool
checkCommit sz _lastcommittime
| sz > 1000 = return True
| otherwise = return False
recordAnnexBranchCommit :: Sha -> SqlPersistM ()
recordAnnexBranchCommit branchcommitsha = do
deleteWhere ([] :: [Filter AnnexBranch])
void $ insertUniqueFast $ AnnexBranch $ toSSha branchcommitsha

View file

@ -55,8 +55,6 @@ Planned schedule of work:
* Goal is for limitFullyBalanced not to need to calcRepoSizes.
* Add git-annex branch sha to Database.RepoSizes.
* When Annex.reposizes does not list the size of a UUID, and
that UUID's size is needed eg for balanced preferred
content, use calcRepoSizes and store in