add annex.alwayscompact
Added annex.alwayscompact setting which can be unset to speed up writes to the git-annex branch in some cases. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
ccff639651
commit
36f0bdcd57
6 changed files with 78 additions and 33 deletions
|
@ -411,13 +411,39 @@ data ChangeOrAppend t = Change t | Append t
|
|||
{- Applies a function that can either modify the content of the file,
|
||||
- or append to the file. Appending can be more efficient when several
|
||||
- lines are written to a file in succession.
|
||||
-
|
||||
- When annex.alwayscompact=false, the function is not passed the content
|
||||
- of the journal file when the journal file already exists, and whatever
|
||||
- value it provides is always appended to the journal file. That avoids
|
||||
- reading the journal file, and so can be faster when many lines are being
|
||||
- written to it. The information that is recorded will be effectively the
|
||||
- same, only obsolate log lines will not get compacted.
|
||||
-}
|
||||
changeOrAppend :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> ChangeOrAppend content) -> Annex ()
|
||||
changeOrAppend ru file f = lockJournal $ \jl -> do
|
||||
changeOrAppend ru file f = lockJournal $ \jl ->
|
||||
checkCanAppendJournalFile jl ru file >>= \case
|
||||
Just appendable -> ifM (annexAlwaysCompact <$> Annex.getGitConfig)
|
||||
( do
|
||||
oldc <- getToChange ru file
|
||||
case f oldc of
|
||||
Change newc -> set jl ru file newc
|
||||
Append toappend -> append jl ru file oldc toappend
|
||||
Append toappend -> append jl file appendable toappend
|
||||
, case f mempty of
|
||||
-- Append even though a change was
|
||||
-- requested; since mempty was passed in,
|
||||
-- the lines requested to change are
|
||||
-- minimized.
|
||||
Change newc -> append jl file appendable newc
|
||||
Append toappend -> append jl file appendable toappend
|
||||
)
|
||||
Nothing -> do
|
||||
oldc <- getToChange ru file
|
||||
case f oldc of
|
||||
Change newc -> set jl ru file newc
|
||||
-- Journal file does not exist yet, so
|
||||
-- cannot append and have to write it all.
|
||||
Append toappend -> set jl ru file $
|
||||
oldc <> journalableByteString toappend
|
||||
|
||||
{- Only get private information when the RegardingUUID is itself private. -}
|
||||
getToChange :: RegardingUUID -> RawFilePath -> Annex L.ByteString
|
||||
|
@ -443,17 +469,11 @@ set jl ru f c = do
|
|||
-- a log file immediately after writing it.
|
||||
invalidateCache
|
||||
|
||||
{- Appends content to the journal file.
|
||||
-
|
||||
- The ByteString is the content that the file had before appending.
|
||||
- It is either the content of the journal file or the content from the
|
||||
- branch. When the journal file does not exist yet, this content is
|
||||
- written to it before appending.
|
||||
-}
|
||||
append :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> L.ByteString -> content -> Annex ()
|
||||
append jl ru f oldc toappend = do
|
||||
{- Appends content to the journal file. -}
|
||||
append :: Journalable content => JournalLocked -> RawFilePath -> AppendableJournalFile -> content -> Annex ()
|
||||
append jl f appendable toappend = do
|
||||
journalChanged
|
||||
appendJournalFile jl ru f oldc toappend
|
||||
appendJournalFile jl appendable toappend
|
||||
fastDebug "Annex.Branch" ("append " ++ fromRawFilePath f)
|
||||
invalidateCache
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
- git-annex branch. Among other things, it ensures that if git-annex is
|
||||
- interrupted, its recorded data is not lost.
|
||||
-
|
||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -20,6 +20,7 @@ import Annex.Perms
|
|||
import Annex.Tmp
|
||||
import Annex.LockFile
|
||||
import Utility.Directory.Stream
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
@ -91,27 +92,30 @@ setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
|
|||
-- exists
|
||||
mv `catchIO` (const (createAnnexDirectory jd >> mv))
|
||||
|
||||
{- Appends content to a journal file.
|
||||
-
|
||||
- The oldcontent is whatever is in the git-annex branch.
|
||||
- When the journal file does not yet exist, the oldcontent
|
||||
- is first written to the journal file.
|
||||
-
|
||||
- TODO: Unsafe! Does not append atomically. -}
|
||||
appendJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> L.ByteString -> content -> Annex ()
|
||||
appendJournalFile _jl ru file oldcontent toappend = do
|
||||
newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath)
|
||||
|
||||
{- If the journal file does not exist, it cannot be appended to, because
|
||||
- that would overwrite whatever content the file has in the git-annex
|
||||
- branch. -}
|
||||
checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile)
|
||||
checkCanAppendJournalFile _jl ru file = do
|
||||
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
||||
( return gitAnnexPrivateJournalDir
|
||||
, return gitAnnexJournalDir
|
||||
)
|
||||
let jfile = fromRawFilePath $ jd P.</> journalFile file
|
||||
let write = liftIO $ ifM (doesFileExist jfile)
|
||||
( withFile jfile AppendMode $ \h ->
|
||||
writeJournalHandle h toappend
|
||||
, withFile jfile WriteMode $ \h -> do
|
||||
writeJournalHandle h oldcontent
|
||||
writeJournalHandle h toappend
|
||||
let jfile = jd P.</> journalFile file
|
||||
ifM (liftIO $ R.doesPathExist jfile)
|
||||
( return (Just (AppendableJournalFile (jd, jfile)))
|
||||
, return Nothing
|
||||
)
|
||||
|
||||
{- Appends content to an existing journal file.
|
||||
-
|
||||
- TODO: Unsafe! Does not append atomically. -}
|
||||
appendJournalFile :: Journalable content => JournalLocked -> AppendableJournalFile -> content -> Annex ()
|
||||
appendJournalFile _jl (AppendableJournalFile (jd, jfile)) content = do
|
||||
let write = liftIO $ withFile (fromRawFilePath jfile) AppendMode $ \h ->
|
||||
writeJournalHandle h content
|
||||
write `catchIO` (const (createAnnexDirectory jd >> write))
|
||||
|
||||
data JournalledContent
|
||||
|
|
|
@ -19,6 +19,8 @@ git-annex (10.20220625) UNRELEASED; urgency=medium
|
|||
importtree=yes or exporttree=yes.
|
||||
* Fix a reversion that prevented --batch commands (and the assistant)
|
||||
from noticing data written to the journal by other commands.
|
||||
* Added annex.alwayscompact setting which can be unset to speed up
|
||||
writes to the git-annex branch in some cases.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 28 Jun 2022 14:49:17 -0400
|
||||
|
||||
|
|
|
@ -57,7 +57,11 @@ getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`)
|
|||
|
||||
setUrlPresent :: Key -> URLString -> Annex ()
|
||||
setUrlPresent key url = do
|
||||
us <- getUrls key
|
||||
-- Avoid reading the url log when not compacting, for speed.
|
||||
us <- ifM (annexAlwaysCompact <$> Annex.getGitConfig)
|
||||
( getUrls key
|
||||
, pure mempty
|
||||
)
|
||||
unless (url `elem` us) $ do
|
||||
config <- Annex.getGitConfig
|
||||
addLog (Annex.Branch.RegardingUUID []) (urlLogFile config key)
|
||||
|
|
|
@ -82,6 +82,7 @@ data GitConfig = GitConfig
|
|||
, annexBloomAccuracy :: Maybe Int
|
||||
, annexSshCaching :: Maybe Bool
|
||||
, annexAlwaysCommit :: Bool
|
||||
, annexAlwaysCompact :: Bool
|
||||
, annexCommitMessage :: Maybe String
|
||||
, annexMergeAnnexBranches :: Bool
|
||||
, annexDelayAdd :: Maybe Int
|
||||
|
@ -164,6 +165,7 @@ extractGitConfig configsource r = GitConfig
|
|||
, annexBloomAccuracy = getmayberead (annexConfig "bloomaccuracy")
|
||||
, annexSshCaching = getmaybebool (annexConfig "sshcaching")
|
||||
, annexAlwaysCommit = getbool (annexConfig "alwayscommit") True
|
||||
, annexAlwaysCompact = getbool (annexConfig "alwayscompact") True
|
||||
, annexCommitMessage = getmaybe (annexConfig "commitmessage")
|
||||
, annexMergeAnnexBranches = getbool (annexConfig "merge-annex-branches") True
|
||||
, annexDelayAdd = getmayberead (annexConfig "delayadd")
|
||||
|
|
|
@ -1038,6 +1038,19 @@ repository, using [[git-annex-config]]. See its man page for a list.)
|
|||
This works well in combination with annex.alwayscommit=false,
|
||||
to gather up a set of changes and commit them with a message you specify.
|
||||
|
||||
* `annex.alwayscompact`
|
||||
|
||||
By default, git-annex compacts data it records in the git-annex branch.
|
||||
Setting this to false avoids doing that compaction in some cases, which
|
||||
can speed up operations that populate the git-annex branch with a lot
|
||||
of data. However, when used with operations that overwrite old values in
|
||||
the git-annex branch, that may cause the git-annex branch to use more disk
|
||||
space, or slow down reading data from it.
|
||||
|
||||
An example of a command that can be sped up by using
|
||||
`-c annex.alwayscompact=false` is `git-annex registerurl --batch`,
|
||||
when adding a large number of urls to the same key.
|
||||
|
||||
* `annex.allowsign`
|
||||
|
||||
By default git-annex avoids gpg signing commits that it makes when
|
||||
|
|
Loading…
Reference in a new issue