calcBranchRepoSizes without journal files
This will be used to prime the RepoSizes database, which will always contain values that correpond to information in the git-annex branch, so without anything from journal files. Factored out overJournalFileContents which will later be used to update Annex.reposizes to include information from journal files. This will be partitcularly important to support private UUIDs which only ever get to journal files and not to the branch.
This commit is contained in:
parent
90a79a6c1e
commit
8ac2685b33
8 changed files with 103 additions and 61 deletions
107
Annex/Branch.hs
107
Annex/Branch.hs
|
@ -38,6 +38,7 @@ module Annex.Branch (
|
|||
precache,
|
||||
UnmergedBranches(..),
|
||||
overBranchFileContents,
|
||||
overJournalFileContents,
|
||||
updatedFromTree,
|
||||
) where
|
||||
|
||||
|
@ -123,7 +124,7 @@ create :: Annex ()
|
|||
create = void getBranch
|
||||
|
||||
{- Returns the sha of the branch, creating it first if necessary. -}
|
||||
getBranch :: Annex Git.Ref
|
||||
getBranch :: Annex Git.Sha
|
||||
getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha
|
||||
where
|
||||
go True = do
|
||||
|
@ -706,8 +707,8 @@ needUpdateIndex branchref = do
|
|||
return (committedref /= branchref)
|
||||
|
||||
{- Record that the branch's index has been updated to correspond to a
|
||||
- given ref of the branch. -}
|
||||
setIndexSha :: Git.Ref -> Annex ()
|
||||
- given sha of the branch. -}
|
||||
setIndexSha :: Git.Sha -> Annex ()
|
||||
setIndexSha ref = do
|
||||
f <- fromRepo gitAnnexIndexStatus
|
||||
writeLogFile f $ fromRef ref ++ "\n"
|
||||
|
@ -994,35 +995,45 @@ data UnmergedBranches t
|
|||
- The action is passed a callback that it can repeatedly call to read
|
||||
- the next file and its contents. When there are no more files, the
|
||||
- callback will return Nothing.
|
||||
-
|
||||
- Returns the accumulated result of the callback, as well as the sha of
|
||||
- the branch at the point it was read.
|
||||
-}
|
||||
overBranchFileContents
|
||||
:: (RawFilePath -> Maybe v)
|
||||
-> Bool
|
||||
-- ^ When there are new files in the journal that have not yet
|
||||
-- been committed to the branch, should those files be omitted?
|
||||
-- When this is False, the callback is run on each journalled file
|
||||
-- at the end, and so may be run more than once on the same file.
|
||||
:: Bool
|
||||
-- ^ Should files in the journal be ignored? When False,
|
||||
-- the content of journalled files is combined with files in the
|
||||
-- git-annex branch. And also, at the end, the callback is run
|
||||
-- on each journalled file, in case some journalled files are new
|
||||
-- files that do not yet appear in the branch. Note that this means
|
||||
-- the callback can be run more than once on the same filename,
|
||||
-- and in this case it's also possible for the callback to be
|
||||
-- passed some of the same file content repeatedly.
|
||||
-> (RawFilePath -> Maybe v)
|
||||
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a)
|
||||
-> Annex (UnmergedBranches a)
|
||||
overBranchFileContents select omitnewjournalledfiles go = do
|
||||
-> Annex (UnmergedBranches (a, Git.Sha))
|
||||
overBranchFileContents ignorejournal select go = do
|
||||
st <- update
|
||||
v <- overBranchFileContents' select omitnewjournalledfiles go st
|
||||
let st' = if ignorejournal
|
||||
then st { journalIgnorable = True }
|
||||
else st
|
||||
v <- overBranchFileContents' select go st'
|
||||
return $ if not (null (unmergedRefs st))
|
||||
then UnmergedBranches v
|
||||
else NoUnmergedBranches v
|
||||
|
||||
overBranchFileContents'
|
||||
:: (RawFilePath -> Maybe v)
|
||||
-> Bool
|
||||
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a)
|
||||
-> BranchState
|
||||
-> Annex a
|
||||
overBranchFileContents' select omitnewjournalledfiles go st = do
|
||||
-> Annex (a, Git.Sha)
|
||||
overBranchFileContents' select go st = do
|
||||
g <- Annex.gitRepo
|
||||
branchsha <- getBranch
|
||||
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
|
||||
Git.LsTree.LsTreeRecursive
|
||||
(Git.LsTree.LsTreeLong False)
|
||||
fullname
|
||||
branchsha
|
||||
let select' f = fmap (\v -> (v, f)) (select f)
|
||||
buf <- liftIO newEmptyMVar
|
||||
let go' reader = go $ liftIO reader >>= \case
|
||||
|
@ -1030,24 +1041,12 @@ overBranchFileContents' select omitnewjournalledfiles go st = do
|
|||
content' <- checkjournal f content
|
||||
return (Just (v, f, content'))
|
||||
Nothing
|
||||
| journalIgnorable st || omitnewjournalledfiles ->
|
||||
return Nothing
|
||||
-- The journal did not get committed to the
|
||||
-- branch, and may contain new files that
|
||||
-- are not present in the branch, which
|
||||
-- need to be provided to the action still.
|
||||
-- This can cause the action to be run a
|
||||
-- second time with a file it already ran on.
|
||||
| otherwise -> liftIO (tryTakeMVar buf) >>= \case
|
||||
Nothing -> do
|
||||
jfs <- journalledFiles
|
||||
pjfs <- journalledFilesPrivate
|
||||
drain buf jfs pjfs
|
||||
Just (jfs, pjfs) -> drain buf jfs pjfs
|
||||
catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
|
||||
| journalIgnorable st -> return Nothing
|
||||
| otherwise -> overJournalFileContents' buf (handlestale branchsha) select
|
||||
res <- catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
|
||||
`finally` liftIO (void cleanup)
|
||||
return (res, branchsha)
|
||||
where
|
||||
-- Check the journal, in case it did not get committed to the branch
|
||||
checkjournal f branchcontent
|
||||
| journalIgnorable st = return branchcontent
|
||||
| otherwise = getJournalFileStale (GetPrivate True) f >>= return . \case
|
||||
|
@ -1057,19 +1056,49 @@ overBranchFileContents' select omitnewjournalledfiles go st = do
|
|||
PossiblyStaleJournalledContent journalledcontent ->
|
||||
Just (fromMaybe mempty branchcontent <> journalledcontent)
|
||||
|
||||
drain buf fs pfs = case getnext fs pfs of
|
||||
handlestale branchsha f journalledcontent = do
|
||||
-- This is expensive, but happens only when there is a
|
||||
-- private journal file.
|
||||
content <- getRef branchsha f
|
||||
return (content <> journalledcontent)
|
||||
|
||||
{- Like overBranchFileContents but only reads the content of journalled
|
||||
- files. Note that when there are private UUIDs, the journal files may
|
||||
- only include information about the private UUID, while information about
|
||||
- other UUIDs has been committed to the git-annex branch.
|
||||
-}
|
||||
overJournalFileContents
|
||||
:: (RawFilePath -> Maybe v)
|
||||
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a)
|
||||
-> Annex a
|
||||
overJournalFileContents select go = do
|
||||
buf <- liftIO newEmptyMVar
|
||||
go $ overJournalFileContents' buf handlestale select
|
||||
where
|
||||
handlestale _f journalledcontent = return journalledcontent
|
||||
|
||||
overJournalFileContents'
|
||||
:: MVar ([RawFilePath], [RawFilePath])
|
||||
-> (RawFilePath -> L.ByteString -> Annex L.ByteString)
|
||||
-> (RawFilePath -> Maybe a)
|
||||
-> Annex (Maybe (a, RawFilePath, Maybe L.ByteString))
|
||||
overJournalFileContents' buf handlestale select =
|
||||
liftIO (tryTakeMVar buf) >>= \case
|
||||
Nothing -> do
|
||||
jfs <- journalledFiles
|
||||
pjfs <- journalledFilesPrivate
|
||||
drain jfs pjfs
|
||||
Just (jfs, pjfs) -> drain jfs pjfs
|
||||
where
|
||||
drain fs pfs = case getnext fs pfs of
|
||||
Just (v, f, fs', pfs') -> do
|
||||
liftIO $ putMVar buf (fs', pfs')
|
||||
content <- getJournalFileStale (GetPrivate True) f >>= \case
|
||||
NoJournalledContent -> return Nothing
|
||||
JournalledContent journalledcontent ->
|
||||
return (Just journalledcontent)
|
||||
PossiblyStaleJournalledContent journalledcontent -> do
|
||||
-- This is expensive, but happens
|
||||
-- only when there is a private
|
||||
-- journal file.
|
||||
content <- getRef fullname f
|
||||
return (Just (content <> journalledcontent))
|
||||
PossiblyStaleJournalledContent journalledcontent ->
|
||||
Just <$> handlestale f journalledcontent
|
||||
return (Just (v, f, content))
|
||||
Nothing -> do
|
||||
liftIO $ putMVar buf ([], [])
|
||||
|
|
|
@ -592,7 +592,7 @@ gitAnnexIndex r = gitAnnexDir r P.</> "index"
|
|||
gitAnnexPrivateIndex :: Git.Repo -> RawFilePath
|
||||
gitAnnexPrivateIndex r = gitAnnexDir r P.</> "index-private"
|
||||
|
||||
{- Holds the ref of the git-annex branch that the index was last updated to.
|
||||
{- Holds the sha of the git-annex branch that the index was last updated to.
|
||||
-
|
||||
- The .lck in the name is a historical accident; this is not used as a
|
||||
- lock. -}
|
||||
|
|
|
@ -12,26 +12,33 @@ import Annex.Branch (UnmergedBranches(..))
|
|||
import Types.RepoSize
|
||||
import Logs.Location
|
||||
import Logs.UUID
|
||||
import Git.Types (Sha)
|
||||
|
||||
import qualified Data.Map.Strict as M
|
||||
|
||||
{- Sum up the sizes of all keys in all repositories, from the information
|
||||
- in the git-annex branch. New keys that only appear in the journal are
|
||||
- not included. Can be slow.
|
||||
- in the git-annex branch, but not the journal. Retuns the sha of the
|
||||
- branch commit that was used.
|
||||
-
|
||||
- The map includes the UUIDs of all known repositories, including
|
||||
- repositories that are empty.
|
||||
-}
|
||||
calcRepoSizes :: Annex (M.Map UUID RepoSize)
|
||||
calcRepoSizes = do
|
||||
calcBranchRepoSizes :: Annex (M.Map UUID RepoSize, Sha)
|
||||
calcBranchRepoSizes = do
|
||||
knownuuids <- M.keys <$> uuidDescMap
|
||||
let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids
|
||||
overLocationLogs True startmap accum >>= \case
|
||||
UnmergedBranches m -> return m
|
||||
NoUnmergedBranches m -> return m
|
||||
UnmergedBranches v -> return v
|
||||
NoUnmergedBranches v -> return v
|
||||
where
|
||||
addksz ksz (Just (RepoSize sz)) = Just $ RepoSize $ sz + ksz
|
||||
addksz ksz Nothing = Just $ RepoSize ksz
|
||||
accum k locs m = return $
|
||||
let sz = fromMaybe 0 $ fromKey keySize k
|
||||
in foldl' (flip $ M.alter $ addksz sz) m locs
|
||||
|
||||
{- Given the RepoSizes calculated from the git-annex branch, updates it with
|
||||
- data from journalled location logs.
|
||||
-}
|
||||
journalledRepoSizes :: M.Map UUID RepoSize -> Sha -> Annex (M.Map UUID RepoSize)
|
||||
journalledRepoSizes m branchsha = undefined --- XXX
|
||||
|
|
|
@ -288,8 +288,8 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
|
|||
(\reader cont -> checktimelimit (discard reader) cont)
|
||||
(\k _ () -> keyaction Nothing (SeekInput [], k, mkActionItem k))
|
||||
>>= \case
|
||||
Annex.Branch.NoUnmergedBranches () -> return ()
|
||||
Annex.Branch.UnmergedBranches () -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
|
||||
Annex.Branch.NoUnmergedBranches ((), _) -> return ()
|
||||
Annex.Branch.UnmergedBranches ((), _) -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
|
||||
|
||||
runkeyaction getks = do
|
||||
keyaction <- mkkeyaction
|
||||
|
|
|
@ -652,7 +652,7 @@ cachedAllRepoData = do
|
|||
, return (d, rd)
|
||||
)
|
||||
case r of
|
||||
NoUnmergedBranches (!(d, rd)) -> do
|
||||
NoUnmergedBranches (!(d, rd), _) -> do
|
||||
let s' = s { allRepoData = Just d, repoData = rd }
|
||||
put s'
|
||||
return s'
|
||||
|
|
|
@ -187,9 +187,9 @@ updateFromLog db@(ImportFeedDbHandle h) (oldtree, currtree)
|
|||
-- When initially populating the database, this
|
||||
-- is faster than diffing from the empty tree
|
||||
-- and looking up every log file.
|
||||
scanbranch = Annex.Branch.overBranchFileContents toscan False goscan >>= \case
|
||||
Annex.Branch.NoUnmergedBranches () -> return ()
|
||||
Annex.Branch.UnmergedBranches () -> scandiff
|
||||
scanbranch = Annex.Branch.overBranchFileContents False toscan goscan >>= \case
|
||||
Annex.Branch.NoUnmergedBranches ((), _) -> return ()
|
||||
Annex.Branch.UnmergedBranches ((), _) -> scandiff
|
||||
|
||||
toscan f
|
||||
| isUrlLog f = Just ()
|
||||
|
|
2
Limit.hs
2
Limit.hs
|
@ -599,7 +599,7 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
|
|||
M.lookup g (uuidsByGroup gm)
|
||||
maxsizes <- getMaxSizes
|
||||
-- XXX do not calc this every time!
|
||||
sizemap <- calcRepoSizes
|
||||
(sizemap, _sha) <- calcBranchRepoSizes
|
||||
let keysize = fromMaybe 0 (fromKey keySize key)
|
||||
currentlocs <- S.fromList <$> loggedLocations key
|
||||
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
||||
|
|
|
@ -45,7 +45,7 @@ import Types.Cluster
|
|||
import Annex.UUID
|
||||
import Annex.CatFile
|
||||
import Annex.VectorClock
|
||||
import Git.Types (RefDate, Ref)
|
||||
import Git.Types (RefDate, Ref, Sha)
|
||||
import qualified Annex
|
||||
|
||||
import Data.Time.Clock
|
||||
|
@ -219,17 +219,23 @@ loggedKeysFor' u = loggedKeys' isthere
|
|||
return there
|
||||
|
||||
{- This is much faster than loggedKeys. -}
|
||||
overLocationLogs :: Bool -> v -> (Key -> [UUID] -> v -> Annex v) -> Annex (Annex.Branch.UnmergedBranches v)
|
||||
overLocationLogs omitnewjournalledfiles v =
|
||||
overLocationLogs' omitnewjournalledfiles v (flip const)
|
||||
overLocationLogs
|
||||
:: Bool
|
||||
-> v
|
||||
-> (Key -> [UUID]
|
||||
-> v
|
||||
-> Annex v)
|
||||
-> Annex (Annex.Branch.UnmergedBranches (v, Sha))
|
||||
overLocationLogs ignorejournal v =
|
||||
overLocationLogs' ignorejournal v (flip const)
|
||||
|
||||
overLocationLogs'
|
||||
:: Bool
|
||||
-> v
|
||||
-> (Annex (Maybe (Key, RawFilePath, Maybe L.ByteString)) -> Annex v -> Annex v)
|
||||
-> (Key -> [UUID] -> v -> Annex v)
|
||||
-> Annex (Annex.Branch.UnmergedBranches v)
|
||||
overLocationLogs' omitnewjournalledfiles iv discarder keyaction = do
|
||||
-> Annex (Annex.Branch.UnmergedBranches (v, Sha))
|
||||
overLocationLogs' ignorejournal iv discarder keyaction = do
|
||||
config <- Annex.getGitConfig
|
||||
clusters <- getClusters
|
||||
|
||||
|
@ -247,7 +253,7 @@ overLocationLogs' omitnewjournalledfiles iv discarder keyaction = do
|
|||
)
|
||||
Nothing -> return v
|
||||
|
||||
Annex.Branch.overBranchFileContents getk omitnewjournalledfiles (go iv)
|
||||
Annex.Branch.overBranchFileContents ignorejournal getk (go iv)
|
||||
|
||||
-- Cannot import Logs.Cluster due to a cycle.
|
||||
-- Annex.clusters gets populated when starting up git-annex.
|
||||
|
|
Loading…
Add table
Reference in a new issue