From 58c7b0a56d5382cefb5c550f165dc55eaa730e2a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 16 Dec 2013 15:43:28 -0400 Subject: [PATCH] assistant: Always batch changes found in startup scan. Batch detection is heuristic, so can sometimes fail. I observed one such failure while starting up in a repository with 87000 files. After the first several batches of ~5000 files, it fell out of batch mode, and never re-entered it, and so made many more commits of a few files at a time than necessary. So, let's always use batch mode when in the startup scan. This avoids the heuristic there, at least. There is clearly also room to improve the heuristic. Possibly 10 files is too high a bar to be found during a commit, on a system that can commit quickly. --- Assistant/Threads/Committer.hs | 10 ++++++---- Assistant/Threads/Watcher.hs | 5 +++++ debian/changelog | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index f736530e47..bb9b0e47eb 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -52,7 +52,7 @@ commitThread = namedThread "Committer" $ do =<< annexDelayAdd <$> Annex.getGitConfig waitChangeTime $ \(changes, time) -> do readychanges <- handleAdds havelsof delayadd changes - if shouldCommit time (length readychanges) readychanges + if shouldCommit False time (length readychanges) readychanges then do debug [ "committing" @@ -94,7 +94,8 @@ waitChangeTime a = waitchanges 0 let len = length changes -- See if now's a good time to commit. now <- liftIO getCurrentTime - case (lastcommitsize >= maxCommitSize, shouldCommit now len changes, possiblyrename changes) of + scanning <- not . scanComplete <$> getDaemonStatus + case (lastcommitsize >= maxCommitSize, shouldCommit scanning now len changes, possiblyrename changes) of (True, True, _) | len > maxCommitSize -> waitchanges =<< a (changes, now) @@ -199,8 +200,9 @@ maxCommitSize = 5000 - Current strategy: If there have been 10 changes within the past second, - a batch activity is taking place, so wait for later. -} -shouldCommit :: UTCTime -> Int -> [Change] -> Bool -shouldCommit now len changes +shouldCommit :: Bool -> UTCTime -> Int -> [Change] -> Bool +shouldCommit scanning now len changes + | scanning = len >= maxCommitSize | len == 0 = False | len >= maxCommitSize = True | length recentchanges < 10 = True diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index d9afb9adfd..50a0efdd54 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -144,6 +144,11 @@ startupScan scanner = do modifyDaemonStatus_ $ \s -> s { scanComplete = True } + -- Ensure that the Committer sees any changes + -- that it did not process, and acts on them now that + -- the scan is complete. + refillChanges =<< getAnyChanges + return (True, r) {- Hardcoded ignores, passed to the DirWatcher so it can avoid looking diff --git a/debian/changelog b/debian/changelog index 8b8968c4da..6283076717 100644 --- a/debian/changelog +++ b/debian/changelog @@ -10,6 +10,7 @@ git-annex (5.20131214) UNRELEASED; urgency=low * Include man pages in Linux and OSX standalone builds. * Linux standalone build now includes its own glibc and forces the linker to use it, to remove dependence on the host glibc. + * assistant: Always batch changes found in startup scan. -- Joey Hess Sun, 15 Dec 2013 13:32:49 -0400