finalize RepoSize database
Including locking on creation, handling of permissions errors, and setting repo sizes. I'm confident that locking is not needed while using this database. Since writes happen in a single transaction. When there are two writers that are recording sizes based on different git-annex branch commits, one will overwrite what the other one recorded. Which is fine, it's only necessary that the database stays consistent with the content of a git-annex branch commit.
This commit is contained in:
parent
63a3cedc45
commit
eac4e9391b
3 changed files with 86 additions and 37 deletions
|
@ -76,6 +76,7 @@ module Annex.Locations (
|
||||||
gitAnnexImportFeedDbDir,
|
gitAnnexImportFeedDbDir,
|
||||||
gitAnnexImportFeedDbLock,
|
gitAnnexImportFeedDbLock,
|
||||||
gitAnnexRepoSizeDbDir,
|
gitAnnexRepoSizeDbDir,
|
||||||
|
gitAnnexRepoSizeDbLock,
|
||||||
gitAnnexScheduleState,
|
gitAnnexScheduleState,
|
||||||
gitAnnexTransferDir,
|
gitAnnexTransferDir,
|
||||||
gitAnnexCredsDir,
|
gitAnnexCredsDir,
|
||||||
|
@ -521,6 +522,10 @@ gitAnnexRepoSizeDbDir :: Git.Repo -> GitConfig -> RawFilePath
|
||||||
gitAnnexRepoSizeDbDir r c =
|
gitAnnexRepoSizeDbDir r c =
|
||||||
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "reposize"
|
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "reposize"
|
||||||
|
|
||||||
|
{- Lock file for the reposize database. -}
|
||||||
|
gitAnnexRepoSizeDbLock :: Git.Repo -> GitConfig -> RawFilePath
|
||||||
|
gitAnnexRepoSizeDbLock r c = gitAnnexRepoSizeDbDir r c <> ".lck"
|
||||||
|
|
||||||
{- .git/annex/schedulestate is used to store information about when
|
{- .git/annex/schedulestate is used to store information about when
|
||||||
- scheduled jobs were last run. -}
|
- scheduled jobs were last run. -}
|
||||||
gitAnnexScheduleState :: Git.Repo -> RawFilePath
|
gitAnnexScheduleState :: Git.Repo -> RawFilePath
|
||||||
|
|
|
@ -22,17 +22,18 @@ module Database.RepoSize (
|
||||||
RepoSizeHandle,
|
RepoSizeHandle,
|
||||||
openDb,
|
openDb,
|
||||||
closeDb,
|
closeDb,
|
||||||
getRepoSizes,
|
getRepoSizes,
|
||||||
setRepoSize,
|
setRepoSizes,
|
||||||
updateRepoSize,
|
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
import Annex.Common
|
||||||
|
import Annex.LockFile
|
||||||
import Types.RepoSize
|
import Types.RepoSize
|
||||||
import Database.Types ()
|
import Git.Types
|
||||||
import qualified Database.Queue as H
|
import qualified Database.Queue as H
|
||||||
import Database.Init
|
import Database.Init
|
||||||
import Annex.Locations
|
import Database.Utility
|
||||||
import Annex.Common
|
import Database.Types
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
import Database.Persist.Sql hiding (Key)
|
import Database.Persist.Sql hiding (Key)
|
||||||
|
@ -40,62 +41,107 @@ import Database.Persist.TH
|
||||||
import qualified System.FilePath.ByteString as P
|
import qualified System.FilePath.ByteString as P
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
|
||||||
newtype RepoSizeHandle = RepoSizeHandle H.DbQueue
|
newtype RepoSizeHandle = RepoSizeHandle (Maybe H.DbQueue)
|
||||||
|
|
||||||
share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase|
|
share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase|
|
||||||
|
-- Corresponds to location log information from the git-annex branch.
|
||||||
RepoSizes
|
RepoSizes
|
||||||
repo UUID
|
repo UUID
|
||||||
size Integer
|
size Integer
|
||||||
UniqueRepo repo
|
UniqueRepo repo
|
||||||
|
-- The last git-annex branch commit that was used to update RepoSizes.
|
||||||
|
AnnexBranch
|
||||||
|
commit SSha
|
||||||
|
UniqueCommit commit
|
||||||
|]
|
|]
|
||||||
|
|
||||||
{- Opens the database, creating it if it doesn't exist yet.
|
{- Opens the database, creating it if it doesn't exist yet.
|
||||||
-
|
-
|
||||||
- No locking is done by this, so caller must prevent multiple processes
|
- Multiple readers and writers can have the database open at the same
|
||||||
- running this at the same time.
|
- time. Database.Handle deals with the concurrency issues.
|
||||||
|
- The lock is held while opening the database, so that when
|
||||||
|
- the database doesn't exist yet, one caller wins the lock and
|
||||||
|
- can create it undisturbed.
|
||||||
-}
|
-}
|
||||||
openDb :: Annex RepoSizeHandle
|
openDb :: Annex RepoSizeHandle
|
||||||
openDb = do
|
openDb = do
|
||||||
dbdir <- calcRepo' gitAnnexRepoSizeDbDir
|
lck <- calcRepo' gitAnnexRepoSizeDbLock
|
||||||
let db = dbdir P.</> "db"
|
catchPermissionDenied permerr $ withExclusiveLock lck $ do
|
||||||
unlessM (liftIO $ R.doesPathExist db) $ do
|
dbdir <- calcRepo' gitAnnexRepoSizeDbDir
|
||||||
initDb db $ void $
|
let db = dbdir P.</> "db"
|
||||||
runMigrationSilent migrateRepoSizes
|
unlessM (liftIO $ R.doesPathExist db) $ do
|
||||||
h <- liftIO $ H.openDbQueue db "reposizes"
|
initDb db $ void $
|
||||||
return $ RepoSizeHandle h
|
runMigrationSilent migrateRepoSizes
|
||||||
|
h <- liftIO $ H.openDbQueue db "repo_sizes"
|
||||||
|
return $ RepoSizeHandle (Just h)
|
||||||
|
where
|
||||||
|
-- If permissions don't allow opening the database,
|
||||||
|
-- just don't use it. Since this database is just a cache
|
||||||
|
-- of information available in the git-annex branch, the same
|
||||||
|
-- information can be queried from the branch, though much less
|
||||||
|
-- efficiently.
|
||||||
|
permerr _e = return (RepoSizeHandle Nothing)
|
||||||
|
|
||||||
closeDb :: RepoSizeHandle -> Annex ()
|
closeDb :: RepoSizeHandle -> Annex ()
|
||||||
closeDb (RepoSizeHandle h) = liftIO $ H.closeDbQueue h
|
closeDb (RepoSizeHandle (Just h)) = liftIO $ H.closeDbQueue h
|
||||||
|
closeDb (RepoSizeHandle Nothing) = noop
|
||||||
|
|
||||||
{- Doesn't see changes that were just made with setRepoSize or
|
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize, Maybe Sha)
|
||||||
- updateRepoSize before flushing the queue. -}
|
getRepoSizes (RepoSizeHandle (Just h)) = H.queryDbQueue h $ do
|
||||||
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize)
|
sizemap <- M.fromList . map conv <$> getRepoSizes'
|
||||||
getRepoSizes (RepoSizeHandle h) = H.queryDbQueue h $
|
annexbranchsha <- getAnnexBranchCommit
|
||||||
M.fromList . map conv <$> getRepoSizes'
|
return (sizemap, annexbranchsha)
|
||||||
where
|
where
|
||||||
conv entity =
|
conv entity =
|
||||||
let RepoSizes u sz = entityVal entity
|
let RepoSizes u sz = entityVal entity
|
||||||
in (u, RepoSize sz)
|
in (u, RepoSize sz)
|
||||||
|
getRepoSizes (RepoSizeHandle Nothing) = return (mempty, Nothing)
|
||||||
|
|
||||||
getRepoSizes' :: SqlPersistM [Entity RepoSizes]
|
getRepoSizes' :: SqlPersistM [Entity RepoSizes]
|
||||||
getRepoSizes' = selectList [] []
|
getRepoSizes' = selectList [] []
|
||||||
|
|
||||||
setRepoSize :: UUID -> RepoSize -> RepoSizeHandle -> IO ()
|
getAnnexBranchCommit :: SqlPersistM (Maybe Sha)
|
||||||
setRepoSize u (RepoSize sz) (RepoSizeHandle h) = H.queueDb h checkCommit $
|
getAnnexBranchCommit = do
|
||||||
|
l <- selectList ([] :: [Filter AnnexBranch]) []
|
||||||
|
case l of
|
||||||
|
(s:[]) -> return $ Just $ fromSSha $
|
||||||
|
annexBranchCommit $ entityVal s
|
||||||
|
_ -> return Nothing
|
||||||
|
|
||||||
|
{- Updates the recorded sizes of all repositories.
|
||||||
|
-
|
||||||
|
- This can be called without locking since the update runs in a single
|
||||||
|
- transaction.
|
||||||
|
-
|
||||||
|
- Any repositories that are not in the provided map, but do have a size
|
||||||
|
- recorded in the database will have it cleared. This is unlikely to
|
||||||
|
- happen, but ensures that the database is consistent.
|
||||||
|
-}
|
||||||
|
setRepoSizes :: RepoSizeHandle -> M.Map UUID RepoSize -> Sha -> IO ()
|
||||||
|
setRepoSizes (RepoSizeHandle (Just h)) sizemap branchcommitsha =
|
||||||
|
H.queueDb h commitimmediately $ do
|
||||||
|
l <- getRepoSizes'
|
||||||
|
forM_ (map entityVal l) $ \(RepoSizes u _) ->
|
||||||
|
unless (M.member u sizemap) $
|
||||||
|
unsetRepoSize u
|
||||||
|
forM_ (M.toList sizemap) $
|
||||||
|
uncurry setRepoSize
|
||||||
|
recordAnnexBranchCommit branchcommitsha
|
||||||
|
where
|
||||||
|
commitimmediately _ _ = pure True
|
||||||
|
setRepoSizes (RepoSizeHandle Nothing) _ _ = noop
|
||||||
|
|
||||||
|
setRepoSize :: UUID -> RepoSize -> SqlPersistM ()
|
||||||
|
setRepoSize u (RepoSize sz) =
|
||||||
void $ upsertBy
|
void $ upsertBy
|
||||||
(UniqueRepo u)
|
(UniqueRepo u)
|
||||||
(RepoSizes u sz)
|
(RepoSizes u sz)
|
||||||
[RepoSizesSize =. sz]
|
[RepoSizesSize =. sz]
|
||||||
|
|
||||||
{- Applies an offset to the size. If no size is recorded for the repo, does
|
unsetRepoSize :: UUID -> SqlPersistM ()
|
||||||
- nothing. -}
|
unsetRepoSize u = deleteWhere [RepoSizesRepo ==. u]
|
||||||
updateRepoSize :: UUID -> Integer -> RepoSizeHandle -> IO ()
|
|
||||||
updateRepoSize u offset (RepoSizeHandle h) = H.queueDb h checkCommit $
|
|
||||||
void $ updateWhere
|
|
||||||
[RepoSizesRepo ==. u]
|
|
||||||
[RepoSizesSize +=. offset]
|
|
||||||
|
|
||||||
checkCommit :: H.QueueSize -> H.LastCommitTime -> IO Bool
|
recordAnnexBranchCommit :: Sha -> SqlPersistM ()
|
||||||
checkCommit sz _lastcommittime
|
recordAnnexBranchCommit branchcommitsha = do
|
||||||
| sz > 1000 = return True
|
deleteWhere ([] :: [Filter AnnexBranch])
|
||||||
| otherwise = return False
|
void $ insertUniqueFast $ AnnexBranch $ toSSha branchcommitsha
|
||||||
|
|
|
@ -55,8 +55,6 @@ Planned schedule of work:
|
||||||
|
|
||||||
* Goal is for limitFullyBalanced not to need to calcRepoSizes.
|
* Goal is for limitFullyBalanced not to need to calcRepoSizes.
|
||||||
|
|
||||||
* Add git-annex branch sha to Database.RepoSizes.
|
|
||||||
|
|
||||||
* When Annex.reposizes does not list the size of a UUID, and
|
* When Annex.reposizes does not list the size of a UUID, and
|
||||||
that UUID's size is needed eg for balanced preferred
|
that UUID's size is needed eg for balanced preferred
|
||||||
content, use calcRepoSizes and store in
|
content, use calcRepoSizes and store in
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue