finalize RepoSize database
Including locking on creation, handling of permissions errors, and setting repo sizes. I'm confident that locking is not needed while using this database. Since writes happen in a single transaction. When there are two writers that are recording sizes based on different git-annex branch commits, one will overwrite what the other one recorded. Which is fine, it's only necessary that the database stays consistent with the content of a git-annex branch commit.
This commit is contained in:
parent
63a3cedc45
commit
eac4e9391b
3 changed files with 86 additions and 37 deletions
|
@ -76,6 +76,7 @@ module Annex.Locations (
|
|||
gitAnnexImportFeedDbDir,
|
||||
gitAnnexImportFeedDbLock,
|
||||
gitAnnexRepoSizeDbDir,
|
||||
gitAnnexRepoSizeDbLock,
|
||||
gitAnnexScheduleState,
|
||||
gitAnnexTransferDir,
|
||||
gitAnnexCredsDir,
|
||||
|
@ -521,6 +522,10 @@ gitAnnexRepoSizeDbDir :: Git.Repo -> GitConfig -> RawFilePath
|
|||
gitAnnexRepoSizeDbDir r c =
|
||||
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "reposize"
|
||||
|
||||
{- Lock file for the reposize database. -}
|
||||
gitAnnexRepoSizeDbLock :: Git.Repo -> GitConfig -> RawFilePath
|
||||
gitAnnexRepoSizeDbLock r c = gitAnnexRepoSizeDbDir r c <> ".lck"
|
||||
|
||||
{- .git/annex/schedulestate is used to store information about when
|
||||
- scheduled jobs were last run. -}
|
||||
gitAnnexScheduleState :: Git.Repo -> RawFilePath
|
||||
|
|
|
@ -22,17 +22,18 @@ module Database.RepoSize (
|
|||
RepoSizeHandle,
|
||||
openDb,
|
||||
closeDb,
|
||||
getRepoSizes,
|
||||
setRepoSize,
|
||||
updateRepoSize,
|
||||
getRepoSizes,
|
||||
setRepoSizes,
|
||||
) where
|
||||
|
||||
import Annex.Common
|
||||
import Annex.LockFile
|
||||
import Types.RepoSize
|
||||
import Database.Types ()
|
||||
import Git.Types
|
||||
import qualified Database.Queue as H
|
||||
import Database.Init
|
||||
import Annex.Locations
|
||||
import Annex.Common
|
||||
import Database.Utility
|
||||
import Database.Types
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import Database.Persist.Sql hiding (Key)
|
||||
|
@ -40,62 +41,107 @@ import Database.Persist.TH
|
|||
import qualified System.FilePath.ByteString as P
|
||||
import qualified Data.Map as M
|
||||
|
||||
newtype RepoSizeHandle = RepoSizeHandle H.DbQueue
|
||||
newtype RepoSizeHandle = RepoSizeHandle (Maybe H.DbQueue)
|
||||
|
||||
share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase|
|
||||
-- Corresponds to location log information from the git-annex branch.
|
||||
RepoSizes
|
||||
repo UUID
|
||||
size Integer
|
||||
UniqueRepo repo
|
||||
-- The last git-annex branch commit that was used to update RepoSizes.
|
||||
AnnexBranch
|
||||
commit SSha
|
||||
UniqueCommit commit
|
||||
|]
|
||||
|
||||
{- Opens the database, creating it if it doesn't exist yet.
|
||||
-
|
||||
- No locking is done by this, so caller must prevent multiple processes
|
||||
- running this at the same time.
|
||||
- Multiple readers and writers can have the database open at the same
|
||||
- time. Database.Handle deals with the concurrency issues.
|
||||
- The lock is held while opening the database, so that when
|
||||
- the database doesn't exist yet, one caller wins the lock and
|
||||
- can create it undisturbed.
|
||||
-}
|
||||
openDb :: Annex RepoSizeHandle
|
||||
openDb = do
|
||||
dbdir <- calcRepo' gitAnnexRepoSizeDbDir
|
||||
let db = dbdir P.</> "db"
|
||||
unlessM (liftIO $ R.doesPathExist db) $ do
|
||||
initDb db $ void $
|
||||
runMigrationSilent migrateRepoSizes
|
||||
h <- liftIO $ H.openDbQueue db "reposizes"
|
||||
return $ RepoSizeHandle h
|
||||
lck <- calcRepo' gitAnnexRepoSizeDbLock
|
||||
catchPermissionDenied permerr $ withExclusiveLock lck $ do
|
||||
dbdir <- calcRepo' gitAnnexRepoSizeDbDir
|
||||
let db = dbdir P.</> "db"
|
||||
unlessM (liftIO $ R.doesPathExist db) $ do
|
||||
initDb db $ void $
|
||||
runMigrationSilent migrateRepoSizes
|
||||
h <- liftIO $ H.openDbQueue db "repo_sizes"
|
||||
return $ RepoSizeHandle (Just h)
|
||||
where
|
||||
-- If permissions don't allow opening the database,
|
||||
-- just don't use it. Since this database is just a cache
|
||||
-- of information available in the git-annex branch, the same
|
||||
-- information can be queried from the branch, though much less
|
||||
-- efficiently.
|
||||
permerr _e = return (RepoSizeHandle Nothing)
|
||||
|
||||
closeDb :: RepoSizeHandle -> Annex ()
|
||||
closeDb (RepoSizeHandle h) = liftIO $ H.closeDbQueue h
|
||||
closeDb (RepoSizeHandle (Just h)) = liftIO $ H.closeDbQueue h
|
||||
closeDb (RepoSizeHandle Nothing) = noop
|
||||
|
||||
{- Doesn't see changes that were just made with setRepoSize or
|
||||
- updateRepoSize before flushing the queue. -}
|
||||
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize)
|
||||
getRepoSizes (RepoSizeHandle h) = H.queryDbQueue h $
|
||||
M.fromList . map conv <$> getRepoSizes'
|
||||
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize, Maybe Sha)
|
||||
getRepoSizes (RepoSizeHandle (Just h)) = H.queryDbQueue h $ do
|
||||
sizemap <- M.fromList . map conv <$> getRepoSizes'
|
||||
annexbranchsha <- getAnnexBranchCommit
|
||||
return (sizemap, annexbranchsha)
|
||||
where
|
||||
conv entity =
|
||||
let RepoSizes u sz = entityVal entity
|
||||
in (u, RepoSize sz)
|
||||
getRepoSizes (RepoSizeHandle Nothing) = return (mempty, Nothing)
|
||||
|
||||
getRepoSizes' :: SqlPersistM [Entity RepoSizes]
|
||||
getRepoSizes' = selectList [] []
|
||||
|
||||
setRepoSize :: UUID -> RepoSize -> RepoSizeHandle -> IO ()
|
||||
setRepoSize u (RepoSize sz) (RepoSizeHandle h) = H.queueDb h checkCommit $
|
||||
getAnnexBranchCommit :: SqlPersistM (Maybe Sha)
|
||||
getAnnexBranchCommit = do
|
||||
l <- selectList ([] :: [Filter AnnexBranch]) []
|
||||
case l of
|
||||
(s:[]) -> return $ Just $ fromSSha $
|
||||
annexBranchCommit $ entityVal s
|
||||
_ -> return Nothing
|
||||
|
||||
{- Updates the recorded sizes of all repositories.
|
||||
-
|
||||
- This can be called without locking since the update runs in a single
|
||||
- transaction.
|
||||
-
|
||||
- Any repositories that are not in the provided map, but do have a size
|
||||
- recorded in the database will have it cleared. This is unlikely to
|
||||
- happen, but ensures that the database is consistent.
|
||||
-}
|
||||
setRepoSizes :: RepoSizeHandle -> M.Map UUID RepoSize -> Sha -> IO ()
|
||||
setRepoSizes (RepoSizeHandle (Just h)) sizemap branchcommitsha =
|
||||
H.queueDb h commitimmediately $ do
|
||||
l <- getRepoSizes'
|
||||
forM_ (map entityVal l) $ \(RepoSizes u _) ->
|
||||
unless (M.member u sizemap) $
|
||||
unsetRepoSize u
|
||||
forM_ (M.toList sizemap) $
|
||||
uncurry setRepoSize
|
||||
recordAnnexBranchCommit branchcommitsha
|
||||
where
|
||||
commitimmediately _ _ = pure True
|
||||
setRepoSizes (RepoSizeHandle Nothing) _ _ = noop
|
||||
|
||||
setRepoSize :: UUID -> RepoSize -> SqlPersistM ()
|
||||
setRepoSize u (RepoSize sz) =
|
||||
void $ upsertBy
|
||||
(UniqueRepo u)
|
||||
(RepoSizes u sz)
|
||||
[RepoSizesSize =. sz]
|
||||
|
||||
{- Applies an offset to the size. If no size is recorded for the repo, does
|
||||
- nothing. -}
|
||||
updateRepoSize :: UUID -> Integer -> RepoSizeHandle -> IO ()
|
||||
updateRepoSize u offset (RepoSizeHandle h) = H.queueDb h checkCommit $
|
||||
void $ updateWhere
|
||||
[RepoSizesRepo ==. u]
|
||||
[RepoSizesSize +=. offset]
|
||||
unsetRepoSize :: UUID -> SqlPersistM ()
|
||||
unsetRepoSize u = deleteWhere [RepoSizesRepo ==. u]
|
||||
|
||||
checkCommit :: H.QueueSize -> H.LastCommitTime -> IO Bool
|
||||
checkCommit sz _lastcommittime
|
||||
| sz > 1000 = return True
|
||||
| otherwise = return False
|
||||
recordAnnexBranchCommit :: Sha -> SqlPersistM ()
|
||||
recordAnnexBranchCommit branchcommitsha = do
|
||||
deleteWhere ([] :: [Filter AnnexBranch])
|
||||
void $ insertUniqueFast $ AnnexBranch $ toSSha branchcommitsha
|
||||
|
|
|
@ -55,8 +55,6 @@ Planned schedule of work:
|
|||
|
||||
* Goal is for limitFullyBalanced not to need to calcRepoSizes.
|
||||
|
||||
* Add git-annex branch sha to Database.RepoSizes.
|
||||
|
||||
* When Annex.reposizes does not list the size of a UUID, and
|
||||
that UUID's size is needed eg for balanced preferred
|
||||
content, use calcRepoSizes and store in
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue