split out appending to journal from writing, high level only
Currently this is not an improvement, but it allows for optimising appendJournalFile later. With an optimised appendJournalFile, this will greatly speed up access patterns like git-annex addurl of a lot of urls to the same key, where the log file can grow rather large. Appending rather than re-writing the journal file for each line can save a lot of disk writes. It still has to read the current journal or branch file, to check if it can append to it, and so when the journal file does not exist yet, it can write the old content from the branch to it. Probably the re-reads are better cached by the filesystem than repeated writes. (If the re-reads turn out to keep performance bad, they could be eliminated, at the cost of not being able to compact the log when replacing old information in it. That could be enabled by a switch.) While the immediate need is to affect addurl writes, it was implemented at the level of presence logs, so will also perhaps speed up location logs. The only added overhead is the call to isNewInfo, which only needs to compare ByteStrings. Helping to balance that out, it avoids compactLog when it's able to append. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
2ce1eaf56a
commit
ce455223df
4 changed files with 61 additions and 6 deletions
|
@ -1,6 +1,6 @@
|
|||
{- management of the git-annex branch
|
||||
-
|
||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -23,6 +23,8 @@ module Annex.Branch (
|
|||
getUnmergedRefs,
|
||||
RegardingUUID(..),
|
||||
change,
|
||||
ChangeOrAppend(..),
|
||||
changeOrAppend,
|
||||
maybeChange,
|
||||
commitMessage,
|
||||
createMessage,
|
||||
|
@ -48,7 +50,7 @@ import Control.Concurrent (threadDelay)
|
|||
import Control.Concurrent.MVar
|
||||
import qualified System.FilePath.ByteString as P
|
||||
|
||||
import Annex.Common
|
||||
import Annex.Common hiding (append)
|
||||
import Types.BranchState
|
||||
import Annex.BranchState
|
||||
import Annex.Journal
|
||||
|
@ -405,6 +407,19 @@ maybeChange ru file f = lockJournal $ \jl -> do
|
|||
in when (v /= b) $ set jl ru file b
|
||||
_ -> noop
|
||||
|
||||
data ChangeOrAppend t = Change t | Append t
|
||||
|
||||
{- Applies a function that can either modify the content of the file,
|
||||
- or append to the file. Appending can be more efficient when several
|
||||
- lines are written to a file in succession.
|
||||
-}
|
||||
changeOrAppend :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> ChangeOrAppend content) -> Annex ()
|
||||
changeOrAppend ru file f = lockJournal $ \jl -> do
|
||||
oldc <- getToChange jl ru file
|
||||
case f oldc of
|
||||
Change newc -> set jl ru file newc
|
||||
Append toappend -> append jl ru file oldc toappend
|
||||
|
||||
{- Only get private information when the RegardingUUID is itself private. -}
|
||||
getToChange :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex L.ByteString
|
||||
getToChange jl ru f = flip (getLocal' jl) f . GetPrivate =<< regardingPrivateUUID ru
|
||||
|
@ -429,6 +444,20 @@ set jl ru f c = do
|
|||
-- a log file immediately after writing it.
|
||||
invalidateCache
|
||||
|
||||
{- Appends content to the journal file.
|
||||
-
|
||||
- The ByteString is the content that the file had before appending.
|
||||
- It is either the content of the journal file or the content from the
|
||||
- branch. When the journal file does not exist yet, this content is
|
||||
- written to it before appending.
|
||||
-}
|
||||
append :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> L.ByteString -> content -> Annex ()
|
||||
append jl ru f oldc toappend = do
|
||||
journalChanged
|
||||
appendJournalFile jl ru f oldc toappend
|
||||
fastDebug "Annex.Branch" ("append " ++ fromRawFilePath f)
|
||||
invalidateCache
|
||||
|
||||
{- Commit message used when making a commit of whatever data has changed
|
||||
- to the git-annex brach. -}
|
||||
commitMessage :: Annex String
|
||||
|
|
|
@ -92,6 +92,23 @@ setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
|
|||
-- exists
|
||||
write `catchIO` (const (createAnnexDirectory jd >> write))
|
||||
|
||||
{- Appends content to a journal file.
|
||||
-
|
||||
- TODO: Inefficient! -}
|
||||
appendJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> L.ByteString -> content -> Annex ()
|
||||
appendJournalFile _jl ru file oldcontent toappend = withOtherTmp $ \tmp -> do
|
||||
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
||||
( return gitAnnexPrivateJournalDir
|
||||
, return gitAnnexJournalDir
|
||||
)
|
||||
let jfile = journalFile file
|
||||
let tmpfile = tmp P.</> jfile
|
||||
let write = liftIO $ do
|
||||
withFile (fromRawFilePath tmpfile) WriteMode $ \h -> do
|
||||
writeJournalHandle h oldcontent
|
||||
writeJournalHandle h toappend
|
||||
write `catchIO` (const (createAnnexDirectory jd >> write))
|
||||
|
||||
data JournalledContent
|
||||
= NoJournalledContent
|
||||
| JournalledContent L.ByteString
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
- A line of the log will look like: "date N INFO"
|
||||
- Where N=1 when the INFO is present, 0 otherwise.
|
||||
-
|
||||
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -35,10 +35,13 @@ addLog ru file logstatus loginfo =
|
|||
|
||||
addLog' :: Annex.Branch.RegardingUUID -> RawFilePath -> LogStatus -> LogInfo -> CandidateVectorClock -> Annex ()
|
||||
addLog' ru file logstatus loginfo c =
|
||||
Annex.Branch.change ru file $ \b ->
|
||||
Annex.Branch.changeOrAppend ru file $ \b ->
|
||||
let old = parseLog b
|
||||
line = genLine logstatus loginfo c old
|
||||
in buildLog $ compactLog (line : old)
|
||||
in if isNewInfo line old
|
||||
then Annex.Branch.Append $ buildLog [line]
|
||||
else Annex.Branch.Change $ buildLog $
|
||||
compactLog (line : old)
|
||||
|
||||
{- When a LogLine already exists with the same status and info, but an
|
||||
- older timestamp, that LogLine is preserved, rather than updating the log
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex presence log, pure operations
|
||||
-
|
||||
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -91,6 +91,12 @@ mapLog = M.elems
|
|||
logMap :: [LogLine] -> LogMap
|
||||
logMap = foldr insertNewerLogLine M.empty
|
||||
|
||||
{- Check if the info of the given line is not in the list of LogLines. -}
|
||||
isNewInfo :: LogLine -> [LogLine] -> Bool
|
||||
isNewInfo l old = not (any issame old)
|
||||
where
|
||||
issame l' = info l' == info l
|
||||
|
||||
insertBetter :: (LogLine -> Bool) -> LogLine -> LogMap -> Maybe LogMap
|
||||
insertBetter betterthan l m
|
||||
| better = Just (M.insert i l m)
|
||||
|
|
Loading…
Add table
Reference in a new issue