Sped up query commands that read the git-annex branch by around 5%
The only price paid is one additional MVar read per write to the journal. Presumably writing a journal file dominiates over a MVar read time by several orders of magnitude. --batch does not get the speedup because then it needs to notice when another process has made a change. Also made the assistant and other damon modes bypass the optimisation, which would not help them anyway.
This commit is contained in:
parent
aba905152a
commit
aeca7c2207
14 changed files with 101 additions and 25 deletions
|
@ -42,6 +42,7 @@ import Data.ByteString.Builder
|
||||||
import Control.Concurrent (threadDelay)
|
import Control.Concurrent (threadDelay)
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
|
import Types.BranchState
|
||||||
import Annex.BranchState
|
import Annex.BranchState
|
||||||
import Annex.Journal
|
import Annex.Journal
|
||||||
import Annex.GitOverlay
|
import Annex.GitOverlay
|
||||||
|
@ -123,7 +124,7 @@ getBranch = maybe (hasOrigin >>= go >>= use) return =<< branchsha
|
||||||
|
|
||||||
{- Ensures that the branch and index are up-to-date; should be
|
{- Ensures that the branch and index are up-to-date; should be
|
||||||
- called before data is read from it. Runs only once per git-annex run. -}
|
- called before data is read from it. Runs only once per git-annex run. -}
|
||||||
update :: Annex ()
|
update :: Annex BranchState
|
||||||
update = runUpdateOnce $ void $ updateTo =<< siblingBranches
|
update = runUpdateOnce $ void $ updateTo =<< siblingBranches
|
||||||
|
|
||||||
{- Forces an update even if one has already been run. -}
|
{- Forces an update even if one has already been run. -}
|
||||||
|
@ -221,8 +222,10 @@ updateTo' pairs = do
|
||||||
- Returns an empty string if the file doesn't exist yet. -}
|
- Returns an empty string if the file doesn't exist yet. -}
|
||||||
get :: RawFilePath -> Annex L.ByteString
|
get :: RawFilePath -> Annex L.ByteString
|
||||||
get file = do
|
get file = do
|
||||||
update
|
st <- update
|
||||||
getLocal file
|
if journalIgnorable st
|
||||||
|
then getRef fullname file
|
||||||
|
else getLocal file
|
||||||
|
|
||||||
{- Like get, but does not merge the branch, so the info returned may not
|
{- Like get, but does not merge the branch, so the info returned may not
|
||||||
- reflect changes in remotes.
|
- reflect changes in remotes.
|
||||||
|
@ -274,7 +277,9 @@ maybeChange file f = lockJournal $ \jl -> do
|
||||||
|
|
||||||
{- Records new content of a file into the journal -}
|
{- Records new content of a file into the journal -}
|
||||||
set :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex ()
|
set :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex ()
|
||||||
set = setJournalFile
|
set jl f c = do
|
||||||
|
journalChanged
|
||||||
|
setJournalFile jl f c
|
||||||
|
|
||||||
{- Commit message used when making a commit of whatever data has changed
|
{- Commit message used when making a commit of whatever data has changed
|
||||||
- to the git-annex brach. -}
|
- to the git-annex brach. -}
|
||||||
|
@ -359,7 +364,7 @@ commitIndex' jl branchref message basemessage retrynum parents = do
|
||||||
- that have not been committed yet. There may be duplicates in the list. -}
|
- that have not been committed yet. There may be duplicates in the list. -}
|
||||||
files :: Annex [RawFilePath]
|
files :: Annex [RawFilePath]
|
||||||
files = do
|
files = do
|
||||||
update
|
_ <- update
|
||||||
-- ++ forces the content of the first list to be buffered in memory,
|
-- ++ forces the content of the first list to be buffered in memory,
|
||||||
-- so use getJournalledFilesStale which should be much smaller most
|
-- so use getJournalledFilesStale which should be much smaller most
|
||||||
-- of the time. branchFiles will stream as the list is consumed.
|
-- of the time. branchFiles will stream as the list is consumed.
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
-
|
-
|
||||||
- Runtime state about the git-annex branch.
|
- Runtime state about the git-annex branch.
|
||||||
-
|
-
|
||||||
- Copyright 2011-2012 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -29,13 +29,56 @@ checkIndexOnce a = unlessM (indexChecked <$> getState) $ do
|
||||||
|
|
||||||
{- Runs an action to update the branch, if it's not been updated before
|
{- Runs an action to update the branch, if it's not been updated before
|
||||||
- in this run of git-annex. -}
|
- in this run of git-annex. -}
|
||||||
runUpdateOnce :: Annex () -> Annex ()
|
runUpdateOnce :: Annex () -> Annex BranchState
|
||||||
runUpdateOnce a = unlessM (branchUpdated <$> getState) $ do
|
runUpdateOnce a = do
|
||||||
a
|
st <- getState
|
||||||
disableUpdate
|
if branchUpdated st
|
||||||
|
then return st
|
||||||
|
else do
|
||||||
|
a
|
||||||
|
let stf = \st' -> st'
|
||||||
|
{ branchUpdated = True
|
||||||
|
-- The update staged anything that was
|
||||||
|
-- journalled before, so the journal
|
||||||
|
-- does not need to be checked going
|
||||||
|
-- forward, unless new information
|
||||||
|
-- gets written to it, or unless
|
||||||
|
-- this run of git-annex needs to notice
|
||||||
|
-- changes journalled by other processes
|
||||||
|
-- while it's running.
|
||||||
|
, journalIgnorable = not $
|
||||||
|
journalNeverIgnorable st'
|
||||||
|
}
|
||||||
|
changeState stf
|
||||||
|
return (stf st)
|
||||||
|
|
||||||
{- Avoids updating the branch. A useful optimisation when the branch
|
{- Avoids updating the branch. A useful optimisation when the branch
|
||||||
- is known to have not changed, or git-annex won't be relying on info
|
- is known to have not changed, or git-annex won't be relying on info
|
||||||
- from it. -}
|
- from it. -}
|
||||||
disableUpdate :: Annex ()
|
disableUpdate :: Annex ()
|
||||||
disableUpdate = changeState $ \s -> s { branchUpdated = True }
|
disableUpdate = changeState $ \s -> s { branchUpdated = True }
|
||||||
|
|
||||||
|
{- Called when a change is made to the journal. -}
|
||||||
|
journalChanged :: Annex ()
|
||||||
|
journalChanged = do
|
||||||
|
-- Optimisation: Typically journalIgnorable will already be True
|
||||||
|
-- (when one thing gets journalled, often other things do to),
|
||||||
|
-- so avoid an unnecessary write to the MVar that changeState
|
||||||
|
-- would do.
|
||||||
|
--
|
||||||
|
-- This assumes that another thread is not changing journalIgnorable
|
||||||
|
-- at the same time, but since runUpdateOnce is the only
|
||||||
|
-- thing that changes it, and it only runs once, that
|
||||||
|
-- should not happen.
|
||||||
|
st <- getState
|
||||||
|
when (journalIgnorable st) $
|
||||||
|
changeState $ \st' -> st' { journalIgnorable = False }
|
||||||
|
|
||||||
|
{- When git-annex is somehow interactive, eg in --batch mode,
|
||||||
|
- and needs to always notice changes made to the journal by other
|
||||||
|
- processes, this disables optimisations that avoid normally reading the
|
||||||
|
- journal.
|
||||||
|
-}
|
||||||
|
enableInteractiveJournalAccess :: Annex ()
|
||||||
|
enableInteractiveJournalAccess = changeState $
|
||||||
|
\s -> s { journalNeverIgnorable = True }
|
||||||
|
|
|
@ -50,6 +50,7 @@ import Utility.ThreadScheduler
|
||||||
import Utility.HumanTime
|
import Utility.HumanTime
|
||||||
import qualified BuildInfo
|
import qualified BuildInfo
|
||||||
import Annex.Perms
|
import Annex.Perms
|
||||||
|
import Annex.BranchState
|
||||||
import Utility.LogFile
|
import Utility.LogFile
|
||||||
#ifdef mingw32_HOST_OS
|
#ifdef mingw32_HOST_OS
|
||||||
import Utility.Env
|
import Utility.Env
|
||||||
|
@ -70,8 +71,8 @@ stopDaemon = liftIO . Utility.Daemon.stopDaemon =<< fromRepo gitAnnexPidFile
|
||||||
- stdout and stderr descriptors. -}
|
- stdout and stderr descriptors. -}
|
||||||
startDaemon :: Bool -> Bool -> Maybe Duration -> Maybe String -> Maybe HostName -> Maybe (Maybe Handle -> Maybe Handle -> String -> FilePath -> IO ()) -> Annex ()
|
startDaemon :: Bool -> Bool -> Maybe Duration -> Maybe String -> Maybe HostName -> Maybe (Maybe Handle -> Maybe Handle -> String -> FilePath -> IO ()) -> Annex ()
|
||||||
startDaemon assistant foreground startdelay cannotrun listenhost startbrowser = do
|
startDaemon assistant foreground startdelay cannotrun listenhost startbrowser = do
|
||||||
|
|
||||||
Annex.changeState $ \s -> s { Annex.daemon = True }
|
Annex.changeState $ \s -> s { Annex.daemon = True }
|
||||||
|
enableInteractiveJournalAccess
|
||||||
pidfile <- fromRepo gitAnnexPidFile
|
pidfile <- fromRepo gitAnnexPidFile
|
||||||
logfile <- fromRepo gitAnnexLogFile
|
logfile <- fromRepo gitAnnexLogFile
|
||||||
liftIO $ debugM desc $ "logging to " ++ logfile
|
liftIO $ debugM desc $ "logging to " ++ logfile
|
||||||
|
|
|
@ -28,7 +28,7 @@ mkGenerator cmds userinput = do
|
||||||
-- so skewing the runtime of the first action that will be
|
-- so skewing the runtime of the first action that will be
|
||||||
-- benchmarked.
|
-- benchmarked.
|
||||||
Annex.Branch.commit "benchmarking"
|
Annex.Branch.commit "benchmarking"
|
||||||
Annex.Branch.update
|
_ <- Annex.Branch.update
|
||||||
l <- mapM parsesubcommand $ split [";"] userinput
|
l <- mapM parsesubcommand $ split [";"] userinput
|
||||||
return $ do
|
return $ do
|
||||||
forM_ l $ \(cmd, seek, st) ->
|
forM_ l $ \(cmd, seek, st) ->
|
||||||
|
|
|
@ -3,6 +3,7 @@ git-annex (8.20200331) UNRELEASED; urgency=medium
|
||||||
* Improve git-annex's ability to find the path to its program,
|
* Improve git-annex's ability to find the path to its program,
|
||||||
especially when it needs to run itself in another repo to upgrade it.
|
especially when it needs to run itself in another repo to upgrade it.
|
||||||
* adb: Better messages when the adb command is not installed.
|
* adb: Better messages when the adb command is not installed.
|
||||||
|
* Sped up query commands that read the git-annex branch by around 5%.
|
||||||
* Various speed improvements gained by using ByteStrings for git refs and
|
* Various speed improvements gained by using ByteStrings for git refs and
|
||||||
shas.
|
shas.
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ import CmdLine.GitAnnex.Options
|
||||||
import Options.Applicative
|
import Options.Applicative
|
||||||
import Limit
|
import Limit
|
||||||
import Types.FileMatcher
|
import Types.FileMatcher
|
||||||
|
import Annex.BranchState
|
||||||
|
|
||||||
data BatchMode = Batch BatchFormat | NoBatch
|
data BatchMode = Batch BatchFormat | NoBatch
|
||||||
|
|
||||||
|
@ -72,7 +73,9 @@ batchInput fmt parser a = go =<< batchLines fmt
|
||||||
parseerr s = giveup $ "Batch input parse failure: " ++ s
|
parseerr s = giveup $ "Batch input parse failure: " ++ s
|
||||||
|
|
||||||
batchLines :: BatchFormat -> Annex [String]
|
batchLines :: BatchFormat -> Annex [String]
|
||||||
batchLines fmt = liftIO $ splitter <$> getContents
|
batchLines fmt = do
|
||||||
|
enableInteractiveJournalAccess
|
||||||
|
liftIO $ splitter <$> getContents
|
||||||
where
|
where
|
||||||
splitter = case fmt of
|
splitter = case fmt of
|
||||||
BatchLine -> lines
|
BatchLine -> lines
|
||||||
|
|
|
@ -45,7 +45,7 @@ perform :: Transitions -> Bool -> CommandPerform
|
||||||
perform ts True = do
|
perform ts True = do
|
||||||
recordTransitions Branch.change ts
|
recordTransitions Branch.change ts
|
||||||
-- get branch committed before contining with the transition
|
-- get branch committed before contining with the transition
|
||||||
Branch.update
|
_ <- Branch.update
|
||||||
void $ Branch.performTransitions ts True []
|
void $ Branch.performTransitions ts True []
|
||||||
next $ return True
|
next $ return True
|
||||||
perform _ False = do
|
perform _ False = do
|
||||||
|
|
|
@ -30,7 +30,7 @@ seek bs = do
|
||||||
|
|
||||||
mergeAnnexBranch :: CommandStart
|
mergeAnnexBranch :: CommandStart
|
||||||
mergeAnnexBranch = starting "merge" (ActionItemOther (Just "git-annex")) $ do
|
mergeAnnexBranch = starting "merge" (ActionItemOther (Just "git-annex")) $ do
|
||||||
Annex.Branch.update
|
_ <- Annex.Branch.update
|
||||||
-- commit explicitly, in case no remote branches were merged
|
-- commit explicitly, in case no remote branches were merged
|
||||||
Annex.Branch.commit =<< Annex.Branch.commitMessage
|
Annex.Branch.commit =<< Annex.Branch.commitMessage
|
||||||
next $ return True
|
next $ return True
|
||||||
|
|
|
@ -17,6 +17,7 @@ import qualified Remote
|
||||||
import Utility.SimpleProtocol (dupIoHandles)
|
import Utility.SimpleProtocol (dupIoHandles)
|
||||||
import Git.Types (RemoteName)
|
import Git.Types (RemoteName)
|
||||||
import qualified Database.Keys
|
import qualified Database.Keys
|
||||||
|
import Annex.BranchState
|
||||||
|
|
||||||
data TransferRequest = TransferRequest Direction Remote Key AssociatedFile
|
data TransferRequest = TransferRequest Direction Remote Key AssociatedFile
|
||||||
|
|
||||||
|
@ -29,6 +30,7 @@ seek = withNothing (commandAction start)
|
||||||
|
|
||||||
start :: CommandStart
|
start :: CommandStart
|
||||||
start = do
|
start = do
|
||||||
|
enableInteractiveJournalAccess
|
||||||
(readh, writeh) <- liftIO dupIoHandles
|
(readh, writeh) <- liftIO dupIoHandles
|
||||||
runRequests readh writeh runner
|
runRequests readh writeh runner
|
||||||
stop
|
stop
|
||||||
|
|
|
@ -90,7 +90,7 @@ knownUrls = do
|
||||||
{- Ensure the git-annex branch's index file is up-to-date and
|
{- Ensure the git-annex branch's index file is up-to-date and
|
||||||
- any journaled changes are reflected in it, since we're going
|
- any journaled changes are reflected in it, since we're going
|
||||||
- to query its index directly. -}
|
- to query its index directly. -}
|
||||||
Annex.Branch.update
|
_ <- Annex.Branch.update
|
||||||
Annex.Branch.commit =<< Annex.Branch.commitMessage
|
Annex.Branch.commit =<< Annex.Branch.commitMessage
|
||||||
Annex.Branch.withIndex $ do
|
Annex.Branch.withIndex $ do
|
||||||
top <- fromRepo Git.repoPath
|
top <- fromRepo Git.repoPath
|
||||||
|
|
|
@ -22,6 +22,7 @@ import Utility.SimpleProtocol
|
||||||
import Utility.ThreadScheduler
|
import Utility.ThreadScheduler
|
||||||
import Config
|
import Config
|
||||||
import Annex.Ssh
|
import Annex.Ssh
|
||||||
|
import Annex.BranchState
|
||||||
import Types.Messages
|
import Types.Messages
|
||||||
|
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
|
@ -163,7 +164,9 @@ genTransportHandle = do
|
||||||
annexstate <- newMVar =<< Annex.new =<< Git.CurrentRepo.get
|
annexstate <- newMVar =<< Annex.new =<< Git.CurrentRepo.get
|
||||||
g <- Annex.repo <$> readMVar annexstate
|
g <- Annex.repo <$> readMVar annexstate
|
||||||
let h = TransportHandle (LocalRepo g) annexstate
|
let h = TransportHandle (LocalRepo g) annexstate
|
||||||
liftAnnex h $ Annex.setOutput QuietOutput
|
liftAnnex h $ do
|
||||||
|
Annex.setOutput QuietOutput
|
||||||
|
enableInteractiveJournalAccess
|
||||||
return h
|
return h
|
||||||
|
|
||||||
updateTransportHandle :: TransportHandle -> IO TransportHandle
|
updateTransportHandle :: TransportHandle -> IO TransportHandle
|
||||||
|
|
|
@ -8,9 +8,17 @@
|
||||||
module Types.BranchState where
|
module Types.BranchState where
|
||||||
|
|
||||||
data BranchState = BranchState
|
data BranchState = BranchState
|
||||||
{ branchUpdated :: Bool -- has the branch been updated this run?
|
{ branchUpdated :: Bool
|
||||||
, indexChecked :: Bool -- has the index file been checked to exist?
|
-- ^ has the branch been updated this run?
|
||||||
|
, indexChecked :: Bool
|
||||||
|
-- ^ has the index file been checked to exist?
|
||||||
|
, journalIgnorable :: Bool
|
||||||
|
-- ^ can reading the journal be skipped, while still getting
|
||||||
|
-- sufficiently up-to-date information from the branch?
|
||||||
|
, journalNeverIgnorable :: Bool
|
||||||
|
-- ^ should the journal always be read even if it would normally
|
||||||
|
-- be safe to skip it?
|
||||||
}
|
}
|
||||||
|
|
||||||
startBranchState :: BranchState
|
startBranchState :: BranchState
|
||||||
startBranchState = BranchState False False
|
startBranchState = BranchState False False False False
|
||||||
|
|
|
@ -102,12 +102,12 @@ push = do
|
||||||
-- will immediately work. Not pushed here,
|
-- will immediately work. Not pushed here,
|
||||||
-- because it's less obnoxious to let the user
|
-- because it's less obnoxious to let the user
|
||||||
-- push.
|
-- push.
|
||||||
Annex.Branch.update
|
void Annex.Branch.update
|
||||||
(True, False) -> do
|
(True, False) -> do
|
||||||
-- push git-annex to origin, so that
|
-- push git-annex to origin, so that
|
||||||
-- "git push" will from then on
|
-- "git push" will from then on
|
||||||
-- automatically push it
|
-- automatically push it
|
||||||
Annex.Branch.update -- just in case
|
void Annex.Branch.update -- just in case
|
||||||
showAction "pushing new git-annex branch to origin"
|
showAction "pushing new git-annex branch to origin"
|
||||||
showOutput
|
showOutput
|
||||||
inRepo $ Git.Command.run
|
inRepo $ Git.Command.run
|
||||||
|
@ -118,7 +118,7 @@ push = do
|
||||||
_ -> do
|
_ -> do
|
||||||
-- no origin exists, so just let the user
|
-- no origin exists, so just let the user
|
||||||
-- know about the new branch
|
-- know about the new branch
|
||||||
Annex.Branch.update
|
void Annex.Branch.update
|
||||||
showLongNote $
|
showLongNote $
|
||||||
"git-annex branch created\n" ++
|
"git-annex branch created\n" ++
|
||||||
"Be sure to push this branch when pushing to remotes.\n"
|
"Be sure to push this branch when pushing to remotes.\n"
|
||||||
|
|
|
@ -11,13 +11,23 @@ What if, once at startup, it checked if the journal was entirely empty.
|
||||||
If so, it can remember that, and avoid reading journal files.
|
If so, it can remember that, and avoid reading journal files.
|
||||||
Perhaps paired with staging the journal if it's not empty.
|
Perhaps paired with staging the journal if it's not empty.
|
||||||
|
|
||||||
|
When a process writes to the journal, it will need to update its state
|
||||||
|
to remember it's no longer empty.
|
||||||
|
|
||||||
This could lead to behavior changes in some cases where one command is
|
This could lead to behavior changes in some cases where one command is
|
||||||
writing changes and another command used to read them from the journal and
|
writing changes and another command used to read them from the journal and
|
||||||
may no longer do so. But any such behavior change is of a behavior that
|
may no longer do so. But any such behavior change is of a behavior that
|
||||||
used to involve a race; the reader could just as well be ahead of the
|
used to involve a race; the reader could just as well be ahead of the
|
||||||
writer and it would have already behaved as it would after the change.
|
writer and it would have already behaved as it would after the change.
|
||||||
|
|
||||||
But: When a process writes to the journal, it will need to update its state
|
> Hmm, not so fast. If the user has two --batch processes, one that makes
|
||||||
to remember it's no longer empty. --[[Joey]]
|
> changes and the other that queries, they will expect the querying process
|
||||||
|
> to see the changes after they were made. There's no race, the user can
|
||||||
|
> control which process runs by feeding batch inputs to them.
|
||||||
|
>
|
||||||
|
> So, --batch and the assistant, as well as batch-like things that don't
|
||||||
|
> use --batch will need to disable this optimisation it seems. --[[Joey]]
|
||||||
|
|
||||||
[[!tag confirmed]]
|
[[!tag confirmed]]
|
||||||
|
|
||||||
|
>> [[done]] speedup was around 5% --[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue