added reposize database
The idea is that upon a merge of the git-annex branch, or a commit to the git-annex branch, the reposize database will be updated. So it should always accurately reflect the location log sizes, but it will often be behind the actual current sizes. Annex.reposizes will start with the value from the database, and get updated with each transfer, so it will reflect a process's best understanding of the current sizes. When there are multiple processes all transferring to the same repo, Annex.reposize will not reflect transfers made by the other processes since the current process started. So when using balanced preferred content, it may make suboptimal choices, including trying to transfer content to the repo when another process has already filled it up. But this is the same as if there are multiple processes running on ifferent machines, so is acceptable. The reposize will eventually get an accurate value reflecting changes made by other processes or in other repos.
This commit is contained in:
parent
71043fe9f7
commit
99a126bebb
8 changed files with 130 additions and 14 deletions
4
Annex.hs
4
Annex.hs
|
@ -75,7 +75,7 @@ import Types.RemoteConfig
|
|||
import Types.TransferrerPool
|
||||
import Types.VectorClock
|
||||
import Types.Cluster
|
||||
import Types.MaxSize
|
||||
import Types.RepoSize
|
||||
import Annex.VectorClock.Utility
|
||||
import Annex.Debug.Utility
|
||||
import qualified Database.Keys.Handle as Keys
|
||||
|
@ -202,6 +202,7 @@ data AnnexState = AnnexState
|
|||
, remoteconfigmap :: Maybe (M.Map UUID RemoteConfig)
|
||||
, clusters :: Maybe (Annex Clusters)
|
||||
, maxsizes :: Maybe (M.Map UUID MaxSize)
|
||||
, reposizes :: Maybe (M.Map UUID RepoSize)
|
||||
, forcetrust :: TrustMap
|
||||
, trustmap :: Maybe TrustMap
|
||||
, groupmap :: Maybe GroupMap
|
||||
|
@ -257,6 +258,7 @@ newAnnexState c r = do
|
|||
, remoteconfigmap = Nothing
|
||||
, clusters = Nothing
|
||||
, maxsizes = Nothing
|
||||
, reposizes = Nothing
|
||||
, forcetrust = M.empty
|
||||
, trustmap = Nothing
|
||||
, groupmap = Nothing
|
||||
|
|
|
@ -75,6 +75,7 @@ module Annex.Locations (
|
|||
gitAnnexContentIdentifierLock,
|
||||
gitAnnexImportFeedDbDir,
|
||||
gitAnnexImportFeedDbLock,
|
||||
gitAnnexRepoSizeDbDir,
|
||||
gitAnnexScheduleState,
|
||||
gitAnnexTransferDir,
|
||||
gitAnnexCredsDir,
|
||||
|
@ -515,6 +516,11 @@ gitAnnexImportFeedDbDir r c =
|
|||
gitAnnexImportFeedDbLock :: Git.Repo -> GitConfig -> RawFilePath
|
||||
gitAnnexImportFeedDbLock r c = gitAnnexImportFeedDbDir r c <> ".lck"
|
||||
|
||||
{- Directory containing reposize database. -}
|
||||
gitAnnexRepoSizeDbDir :: Git.Repo -> GitConfig -> RawFilePath
|
||||
gitAnnexRepoSizeDbDir r c =
|
||||
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "reposize"
|
||||
|
||||
{- .git/annex/schedulestate is used to store information about when
|
||||
- scheduled jobs were last run. -}
|
||||
gitAnnexScheduleState :: Git.Repo -> RawFilePath
|
||||
|
|
|
@ -14,6 +14,7 @@ module Database.Queue (
|
|||
closeDbQueue,
|
||||
flushDbQueue,
|
||||
QueueSize,
|
||||
LastCommitTime,
|
||||
queueDb,
|
||||
) where
|
||||
|
||||
|
|
101
Database/RepoSize.hs
Normal file
101
Database/RepoSize.hs
Normal file
|
@ -0,0 +1,101 @@
|
|||
{- Sqlite database used to track the sizes of repositories.
|
||||
-
|
||||
- Copyright 2024 Joey Hess <id@joeyh.name>
|
||||
-:
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
|
||||
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE DataKinds, FlexibleInstances #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE UndecidableInstances #-}
|
||||
{-# LANGUAGE TypeOperators #-}
|
||||
#if MIN_VERSION_persistent_template(2,8,0)
|
||||
{-# LANGUAGE DerivingStrategies #-}
|
||||
{-# LANGUAGE StandaloneDeriving #-}
|
||||
#endif
|
||||
|
||||
module Database.RepoSize (
|
||||
RepoSizeHandle,
|
||||
openDb,
|
||||
closeDb,
|
||||
getRepoSizes,
|
||||
setRepoSize,
|
||||
updateRepoSize,
|
||||
) where
|
||||
|
||||
import Types.RepoSize
|
||||
import Database.Types ()
|
||||
import qualified Database.Queue as H
|
||||
import Database.Init
|
||||
import Annex.Locations
|
||||
import Annex.Common
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import Database.Persist.Sql hiding (Key)
|
||||
import Database.Persist.TH
|
||||
import qualified System.FilePath.ByteString as P
|
||||
import qualified Data.Map as M
|
||||
|
||||
newtype RepoSizeHandle = RepoSizeHandle H.DbQueue
|
||||
|
||||
share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase|
|
||||
RepoSizes
|
||||
repo UUID
|
||||
size Integer
|
||||
UniqueRepo repo
|
||||
|]
|
||||
|
||||
{- Opens the database, creating it if it doesn't exist yet.
|
||||
-
|
||||
- No locking is done by this, so caller must prevent multiple processes
|
||||
- running this at the same time.
|
||||
-}
|
||||
openDb :: Annex RepoSizeHandle
|
||||
openDb = do
|
||||
dbdir <- calcRepo' gitAnnexRepoSizeDbDir
|
||||
let db = dbdir P.</> "db"
|
||||
unlessM (liftIO $ R.doesPathExist db) $ do
|
||||
initDb db $ void $
|
||||
runMigrationSilent migrateRepoSizes
|
||||
h <- liftIO $ H.openDbQueue db "reposizes"
|
||||
return $ RepoSizeHandle h
|
||||
|
||||
closeDb :: RepoSizeHandle -> Annex ()
|
||||
closeDb (RepoSizeHandle h) = liftIO $ H.closeDbQueue h
|
||||
|
||||
{- Doesn't see changes that were just made with setRepoSize or
|
||||
- updateRepoSize before flushing the queue. -}
|
||||
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize)
|
||||
getRepoSizes (RepoSizeHandle h) = H.queryDbQueue h $
|
||||
M.fromList . map conv <$> getRepoSizes'
|
||||
where
|
||||
conv entity =
|
||||
let RepoSizes u sz = entityVal entity
|
||||
in (u, RepoSize sz)
|
||||
|
||||
getRepoSizes' :: SqlPersistM [Entity RepoSizes]
|
||||
getRepoSizes' = selectList [] []
|
||||
|
||||
setRepoSize :: UUID -> RepoSize -> RepoSizeHandle -> IO ()
|
||||
setRepoSize u (RepoSize sz) (RepoSizeHandle h) = H.queueDb h checkCommit $
|
||||
void $ upsertBy
|
||||
(UniqueRepo u)
|
||||
(RepoSizes u sz)
|
||||
[RepoSizesSize =. sz]
|
||||
|
||||
{- Applies an offset to the size. If no size is recorded for the repo, does
|
||||
- nothing. -}
|
||||
updateRepoSize :: UUID -> Integer -> RepoSizeHandle -> IO ()
|
||||
updateRepoSize u offset (RepoSizeHandle h) = H.queueDb h checkCommit $
|
||||
void $ updateWhere
|
||||
[RepoSizesRepo ==. u]
|
||||
[RepoSizesSize +=. offset]
|
||||
|
||||
checkCommit :: H.QueueSize -> H.LastCommitTime -> IO Bool
|
||||
checkCommit sz _lastcommittime
|
||||
| sz > 1000 = return True
|
||||
| otherwise = return False
|
|
@ -13,7 +13,7 @@ module Logs.MaxSize (
|
|||
|
||||
import qualified Annex
|
||||
import Annex.Common
|
||||
import Types.MaxSize
|
||||
import Types.RepoSize
|
||||
import Logs
|
||||
import Logs.UUIDBased
|
||||
import Logs.MapLog
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
{- git-annex maxsize type
|
||||
-
|
||||
- Copyright 2024 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Types.MaxSize where
|
||||
|
||||
newtype MaxSize = MaxSize Integer
|
||||
deriving (Show, Eq, Ord)
|
16
Types/RepoSize.hs
Normal file
16
Types/RepoSize.hs
Normal file
|
@ -0,0 +1,16 @@
|
|||
{- git-annex repo sizes types
|
||||
-
|
||||
- Copyright 2024 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Types.RepoSize where
|
||||
|
||||
-- The current size of a repo.
|
||||
newtype RepoSize = RepoSize Integer
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
-- The maximum size of a repo.
|
||||
newtype MaxSize = MaxSize Integer
|
||||
deriving (Show, Eq, Ord)
|
|
@ -783,6 +783,7 @@ Executable git-annex
|
|||
Database.Keys.SQL
|
||||
Database.Queue
|
||||
Database.RawFilePath
|
||||
Database.RepoSize
|
||||
Database.Types
|
||||
Database.Utility
|
||||
Git
|
||||
|
@ -988,7 +989,6 @@ Executable git-annex
|
|||
Types.KeySource
|
||||
Types.Link
|
||||
Types.LockCache
|
||||
Types.MaxSize
|
||||
Types.Messages
|
||||
Types.MetaData
|
||||
Types.Mime
|
||||
|
@ -998,6 +998,7 @@ Executable git-annex
|
|||
Types.Remote
|
||||
Types.RemoteConfig
|
||||
Types.RemoteState
|
||||
Types.RepoSize
|
||||
Types.RepoVersion
|
||||
Types.ScheduledActivity
|
||||
Types.StandardGroups
|
||||
|
|
Loading…
Reference in a new issue