diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 9bf613fe71..ed6f648641 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -38,6 +38,7 @@ module Annex.Branch ( precache, UnmergedBranches(..), overBranchFileContents, + overJournalFileContents, updatedFromTree, ) where @@ -123,7 +124,7 @@ create :: Annex () create = void getBranch {- Returns the sha of the branch, creating it first if necessary. -} -getBranch :: Annex Git.Ref +getBranch :: Annex Git.Sha getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha where go True = do @@ -706,8 +707,8 @@ needUpdateIndex branchref = do return (committedref /= branchref) {- Record that the branch's index has been updated to correspond to a - - given ref of the branch. -} -setIndexSha :: Git.Ref -> Annex () + - given sha of the branch. -} +setIndexSha :: Git.Sha -> Annex () setIndexSha ref = do f <- fromRepo gitAnnexIndexStatus writeLogFile f $ fromRef ref ++ "\n" @@ -994,35 +995,45 @@ data UnmergedBranches t - The action is passed a callback that it can repeatedly call to read - the next file and its contents. When there are no more files, the - callback will return Nothing. + - + - Returns the accumulated result of the callback, as well as the sha of + - the branch at the point it was read. -} overBranchFileContents - :: (RawFilePath -> Maybe v) - -> Bool - -- ^ When there are new files in the journal that have not yet - -- been committed to the branch, should those files be omitted? - -- When this is False, the callback is run on each journalled file - -- at the end, and so may be run more than once on the same file. + :: Bool + -- ^ Should files in the journal be ignored? When False, + -- the content of journalled files is combined with files in the + -- git-annex branch. And also, at the end, the callback is run + -- on each journalled file, in case some journalled files are new + -- files that do not yet appear in the branch. Note that this means + -- the callback can be run more than once on the same filename, + -- and in this case it's also possible for the callback to be + -- passed some of the same file content repeatedly. + -> (RawFilePath -> Maybe v) -> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a) - -> Annex (UnmergedBranches a) -overBranchFileContents select omitnewjournalledfiles go = do + -> Annex (UnmergedBranches (a, Git.Sha)) +overBranchFileContents ignorejournal select go = do st <- update - v <- overBranchFileContents' select omitnewjournalledfiles go st + let st' = if ignorejournal + then st { journalIgnorable = True } + else st + v <- overBranchFileContents' select go st' return $ if not (null (unmergedRefs st)) then UnmergedBranches v else NoUnmergedBranches v overBranchFileContents' :: (RawFilePath -> Maybe v) - -> Bool -> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a) -> BranchState - -> Annex a -overBranchFileContents' select omitnewjournalledfiles go st = do + -> Annex (a, Git.Sha) +overBranchFileContents' select go st = do g <- Annex.gitRepo + branchsha <- getBranch (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive (Git.LsTree.LsTreeLong False) - fullname + branchsha let select' f = fmap (\v -> (v, f)) (select f) buf <- liftIO newEmptyMVar let go' reader = go $ liftIO reader >>= \case @@ -1030,24 +1041,12 @@ overBranchFileContents' select omitnewjournalledfiles go st = do content' <- checkjournal f content return (Just (v, f, content')) Nothing - | journalIgnorable st || omitnewjournalledfiles -> - return Nothing - -- The journal did not get committed to the - -- branch, and may contain new files that - -- are not present in the branch, which - -- need to be provided to the action still. - -- This can cause the action to be run a - -- second time with a file it already ran on. - | otherwise -> liftIO (tryTakeMVar buf) >>= \case - Nothing -> do - jfs <- journalledFiles - pjfs <- journalledFilesPrivate - drain buf jfs pjfs - Just (jfs, pjfs) -> drain buf jfs pjfs - catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go' + | journalIgnorable st -> return Nothing + | otherwise -> overJournalFileContents' buf (handlestale branchsha) select + res <- catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go' `finally` liftIO (void cleanup) + return (res, branchsha) where - -- Check the journal, in case it did not get committed to the branch checkjournal f branchcontent | journalIgnorable st = return branchcontent | otherwise = getJournalFileStale (GetPrivate True) f >>= return . \case @@ -1056,20 +1055,50 @@ overBranchFileContents' select omitnewjournalledfiles go st = do Just journalledcontent PossiblyStaleJournalledContent journalledcontent -> Just (fromMaybe mempty branchcontent <> journalledcontent) - - drain buf fs pfs = case getnext fs pfs of + + handlestale branchsha f journalledcontent = do + -- This is expensive, but happens only when there is a + -- private journal file. + content <- getRef branchsha f + return (content <> journalledcontent) + +{- Like overBranchFileContents but only reads the content of journalled + - files. Note that when there are private UUIDs, the journal files may + - only include information about the private UUID, while information about + - other UUIDs has been committed to the git-annex branch. + -} +overJournalFileContents + :: (RawFilePath -> Maybe v) + -> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a) + -> Annex a +overJournalFileContents select go = do + buf <- liftIO newEmptyMVar + go $ overJournalFileContents' buf handlestale select + where + handlestale _f journalledcontent = return journalledcontent + +overJournalFileContents' + :: MVar ([RawFilePath], [RawFilePath]) + -> (RawFilePath -> L.ByteString -> Annex L.ByteString) + -> (RawFilePath -> Maybe a) + -> Annex (Maybe (a, RawFilePath, Maybe L.ByteString)) +overJournalFileContents' buf handlestale select = + liftIO (tryTakeMVar buf) >>= \case + Nothing -> do + jfs <- journalledFiles + pjfs <- journalledFilesPrivate + drain jfs pjfs + Just (jfs, pjfs) -> drain jfs pjfs + where + drain fs pfs = case getnext fs pfs of Just (v, f, fs', pfs') -> do liftIO $ putMVar buf (fs', pfs') content <- getJournalFileStale (GetPrivate True) f >>= \case NoJournalledContent -> return Nothing JournalledContent journalledcontent -> return (Just journalledcontent) - PossiblyStaleJournalledContent journalledcontent -> do - -- This is expensive, but happens - -- only when there is a private - -- journal file. - content <- getRef fullname f - return (Just (content <> journalledcontent)) + PossiblyStaleJournalledContent journalledcontent -> + Just <$> handlestale f journalledcontent return (Just (v, f, content)) Nothing -> do liftIO $ putMVar buf ([], []) diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 2605f74651..0b1ad4d556 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -592,7 +592,7 @@ gitAnnexIndex r = gitAnnexDir r P. "index" gitAnnexPrivateIndex :: Git.Repo -> RawFilePath gitAnnexPrivateIndex r = gitAnnexDir r P. "index-private" -{- Holds the ref of the git-annex branch that the index was last updated to. +{- Holds the sha of the git-annex branch that the index was last updated to. - - The .lck in the name is a historical accident; this is not used as a - lock. -} diff --git a/Annex/RepoSize.hs b/Annex/RepoSize.hs index 584785cbb8..936101f02a 100644 --- a/Annex/RepoSize.hs +++ b/Annex/RepoSize.hs @@ -12,26 +12,33 @@ import Annex.Branch (UnmergedBranches(..)) import Types.RepoSize import Logs.Location import Logs.UUID +import Git.Types (Sha) import qualified Data.Map.Strict as M {- Sum up the sizes of all keys in all repositories, from the information - - in the git-annex branch. New keys that only appear in the journal are - - not included. Can be slow. + - in the git-annex branch, but not the journal. Retuns the sha of the + - branch commit that was used. - - The map includes the UUIDs of all known repositories, including - repositories that are empty. -} -calcRepoSizes :: Annex (M.Map UUID RepoSize) -calcRepoSizes = do +calcBranchRepoSizes :: Annex (M.Map UUID RepoSize, Sha) +calcBranchRepoSizes = do knownuuids <- M.keys <$> uuidDescMap let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids overLocationLogs True startmap accum >>= \case - UnmergedBranches m -> return m - NoUnmergedBranches m -> return m + UnmergedBranches v -> return v + NoUnmergedBranches v -> return v where addksz ksz (Just (RepoSize sz)) = Just $ RepoSize $ sz + ksz addksz ksz Nothing = Just $ RepoSize ksz accum k locs m = return $ let sz = fromMaybe 0 $ fromKey keySize k in foldl' (flip $ M.alter $ addksz sz) m locs + +{- Given the RepoSizes calculated from the git-annex branch, updates it with + - data from journalled location logs. + -} +journalledRepoSizes :: M.Map UUID RepoSize -> Sha -> Annex (M.Map UUID RepoSize) +journalledRepoSizes m branchsha = undefined --- XXX diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index d9a6b81499..f846695016 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -288,8 +288,8 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do (\reader cont -> checktimelimit (discard reader) cont) (\k _ () -> keyaction Nothing (SeekInput [], k, mkActionItem k)) >>= \case - Annex.Branch.NoUnmergedBranches () -> return () - Annex.Branch.UnmergedBranches () -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)" + Annex.Branch.NoUnmergedBranches ((), _) -> return () + Annex.Branch.UnmergedBranches ((), _) -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)" runkeyaction getks = do keyaction <- mkkeyaction diff --git a/Command/Info.hs b/Command/Info.hs index 2a061efdfb..5906fe3b43 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -652,7 +652,7 @@ cachedAllRepoData = do , return (d, rd) ) case r of - NoUnmergedBranches (!(d, rd)) -> do + NoUnmergedBranches (!(d, rd), _) -> do let s' = s { allRepoData = Just d, repoData = rd } put s' return s' diff --git a/Database/ImportFeed.hs b/Database/ImportFeed.hs index c17eb0ca04..5f35ff3505 100644 --- a/Database/ImportFeed.hs +++ b/Database/ImportFeed.hs @@ -187,9 +187,9 @@ updateFromLog db@(ImportFeedDbHandle h) (oldtree, currtree) -- When initially populating the database, this -- is faster than diffing from the empty tree -- and looking up every log file. - scanbranch = Annex.Branch.overBranchFileContents toscan False goscan >>= \case - Annex.Branch.NoUnmergedBranches () -> return () - Annex.Branch.UnmergedBranches () -> scandiff + scanbranch = Annex.Branch.overBranchFileContents False toscan goscan >>= \case + Annex.Branch.NoUnmergedBranches ((), _) -> return () + Annex.Branch.UnmergedBranches ((), _) -> scandiff toscan f | isUrlLog f = Just () diff --git a/Limit.hs b/Limit.hs index ff988561fe..753aa4469a 100644 --- a/Limit.hs +++ b/Limit.hs @@ -599,7 +599,7 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles M.lookup g (uuidsByGroup gm) maxsizes <- getMaxSizes -- XXX do not calc this every time! - sizemap <- calcRepoSizes + (sizemap, _sha) <- calcBranchRepoSizes let keysize = fromMaybe 0 (fromKey keySize key) currentlocs <- S.fromList <$> loggedLocations key let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of diff --git a/Logs/Location.hs b/Logs/Location.hs index c07b19cd2a..ddcbb58234 100644 --- a/Logs/Location.hs +++ b/Logs/Location.hs @@ -45,7 +45,7 @@ import Types.Cluster import Annex.UUID import Annex.CatFile import Annex.VectorClock -import Git.Types (RefDate, Ref) +import Git.Types (RefDate, Ref, Sha) import qualified Annex import Data.Time.Clock @@ -219,17 +219,23 @@ loggedKeysFor' u = loggedKeys' isthere return there {- This is much faster than loggedKeys. -} -overLocationLogs :: Bool -> v -> (Key -> [UUID] -> v -> Annex v) -> Annex (Annex.Branch.UnmergedBranches v) -overLocationLogs omitnewjournalledfiles v = - overLocationLogs' omitnewjournalledfiles v (flip const) +overLocationLogs + :: Bool + -> v + -> (Key -> [UUID] + -> v + -> Annex v) + -> Annex (Annex.Branch.UnmergedBranches (v, Sha)) +overLocationLogs ignorejournal v = + overLocationLogs' ignorejournal v (flip const) overLocationLogs' :: Bool -> v -> (Annex (Maybe (Key, RawFilePath, Maybe L.ByteString)) -> Annex v -> Annex v) -> (Key -> [UUID] -> v -> Annex v) - -> Annex (Annex.Branch.UnmergedBranches v) -overLocationLogs' omitnewjournalledfiles iv discarder keyaction = do + -> Annex (Annex.Branch.UnmergedBranches (v, Sha)) +overLocationLogs' ignorejournal iv discarder keyaction = do config <- Annex.getGitConfig clusters <- getClusters @@ -247,7 +253,7 @@ overLocationLogs' omitnewjournalledfiles iv discarder keyaction = do ) Nothing -> return v - Annex.Branch.overBranchFileContents getk omitnewjournalledfiles (go iv) + Annex.Branch.overBranchFileContents ignorejournal getk (go iv) -- Cannot import Logs.Cluster due to a cycle. -- Annex.clusters gets populated when starting up git-annex.