add annex.alwayscompact
Added annex.alwayscompact setting which can be unset to speed up writes to the git-annex branch in some cases. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
ccff639651
commit
36f0bdcd57
6 changed files with 78 additions and 33 deletions
|
@ -411,13 +411,39 @@ data ChangeOrAppend t = Change t | Append t
|
||||||
{- Applies a function that can either modify the content of the file,
|
{- Applies a function that can either modify the content of the file,
|
||||||
- or append to the file. Appending can be more efficient when several
|
- or append to the file. Appending can be more efficient when several
|
||||||
- lines are written to a file in succession.
|
- lines are written to a file in succession.
|
||||||
|
-
|
||||||
|
- When annex.alwayscompact=false, the function is not passed the content
|
||||||
|
- of the journal file when the journal file already exists, and whatever
|
||||||
|
- value it provides is always appended to the journal file. That avoids
|
||||||
|
- reading the journal file, and so can be faster when many lines are being
|
||||||
|
- written to it. The information that is recorded will be effectively the
|
||||||
|
- same, only obsolate log lines will not get compacted.
|
||||||
-}
|
-}
|
||||||
changeOrAppend :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> ChangeOrAppend content) -> Annex ()
|
changeOrAppend :: Journalable content => RegardingUUID -> RawFilePath -> (L.ByteString -> ChangeOrAppend content) -> Annex ()
|
||||||
changeOrAppend ru file f = lockJournal $ \jl -> do
|
changeOrAppend ru file f = lockJournal $ \jl ->
|
||||||
oldc <- getToChange ru file
|
checkCanAppendJournalFile jl ru file >>= \case
|
||||||
case f oldc of
|
Just appendable -> ifM (annexAlwaysCompact <$> Annex.getGitConfig)
|
||||||
Change newc -> set jl ru file newc
|
( do
|
||||||
Append toappend -> append jl ru file oldc toappend
|
oldc <- getToChange ru file
|
||||||
|
case f oldc of
|
||||||
|
Change newc -> set jl ru file newc
|
||||||
|
Append toappend -> append jl file appendable toappend
|
||||||
|
, case f mempty of
|
||||||
|
-- Append even though a change was
|
||||||
|
-- requested; since mempty was passed in,
|
||||||
|
-- the lines requested to change are
|
||||||
|
-- minimized.
|
||||||
|
Change newc -> append jl file appendable newc
|
||||||
|
Append toappend -> append jl file appendable toappend
|
||||||
|
)
|
||||||
|
Nothing -> do
|
||||||
|
oldc <- getToChange ru file
|
||||||
|
case f oldc of
|
||||||
|
Change newc -> set jl ru file newc
|
||||||
|
-- Journal file does not exist yet, so
|
||||||
|
-- cannot append and have to write it all.
|
||||||
|
Append toappend -> set jl ru file $
|
||||||
|
oldc <> journalableByteString toappend
|
||||||
|
|
||||||
{- Only get private information when the RegardingUUID is itself private. -}
|
{- Only get private information when the RegardingUUID is itself private. -}
|
||||||
getToChange :: RegardingUUID -> RawFilePath -> Annex L.ByteString
|
getToChange :: RegardingUUID -> RawFilePath -> Annex L.ByteString
|
||||||
|
@ -443,17 +469,11 @@ set jl ru f c = do
|
||||||
-- a log file immediately after writing it.
|
-- a log file immediately after writing it.
|
||||||
invalidateCache
|
invalidateCache
|
||||||
|
|
||||||
{- Appends content to the journal file.
|
{- Appends content to the journal file. -}
|
||||||
-
|
append :: Journalable content => JournalLocked -> RawFilePath -> AppendableJournalFile -> content -> Annex ()
|
||||||
- The ByteString is the content that the file had before appending.
|
append jl f appendable toappend = do
|
||||||
- It is either the content of the journal file or the content from the
|
|
||||||
- branch. When the journal file does not exist yet, this content is
|
|
||||||
- written to it before appending.
|
|
||||||
-}
|
|
||||||
append :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> L.ByteString -> content -> Annex ()
|
|
||||||
append jl ru f oldc toappend = do
|
|
||||||
journalChanged
|
journalChanged
|
||||||
appendJournalFile jl ru f oldc toappend
|
appendJournalFile jl appendable toappend
|
||||||
fastDebug "Annex.Branch" ("append " ++ fromRawFilePath f)
|
fastDebug "Annex.Branch" ("append " ++ fromRawFilePath f)
|
||||||
invalidateCache
|
invalidateCache
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
- git-annex branch. Among other things, it ensures that if git-annex is
|
- git-annex branch. Among other things, it ensures that if git-annex is
|
||||||
- interrupted, its recorded data is not lost.
|
- interrupted, its recorded data is not lost.
|
||||||
-
|
-
|
||||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -20,6 +20,7 @@ import Annex.Perms
|
||||||
import Annex.Tmp
|
import Annex.Tmp
|
||||||
import Annex.LockFile
|
import Annex.LockFile
|
||||||
import Utility.Directory.Stream
|
import Utility.Directory.Stream
|
||||||
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
@ -91,27 +92,30 @@ setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do
|
||||||
-- exists
|
-- exists
|
||||||
mv `catchIO` (const (createAnnexDirectory jd >> mv))
|
mv `catchIO` (const (createAnnexDirectory jd >> mv))
|
||||||
|
|
||||||
{- Appends content to a journal file.
|
newtype AppendableJournalFile = AppendableJournalFile (RawFilePath, RawFilePath)
|
||||||
-
|
|
||||||
- The oldcontent is whatever is in the git-annex branch.
|
{- If the journal file does not exist, it cannot be appended to, because
|
||||||
- When the journal file does not yet exist, the oldcontent
|
- that would overwrite whatever content the file has in the git-annex
|
||||||
- is first written to the journal file.
|
- branch. -}
|
||||||
-
|
checkCanAppendJournalFile :: JournalLocked -> RegardingUUID -> RawFilePath -> Annex (Maybe AppendableJournalFile)
|
||||||
- TODO: Unsafe! Does not append atomically. -}
|
checkCanAppendJournalFile _jl ru file = do
|
||||||
appendJournalFile :: Journalable content => JournalLocked -> RegardingUUID -> RawFilePath -> L.ByteString -> content -> Annex ()
|
|
||||||
appendJournalFile _jl ru file oldcontent toappend = do
|
|
||||||
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
jd <- fromRepo =<< ifM (regardingPrivateUUID ru)
|
||||||
( return gitAnnexPrivateJournalDir
|
( return gitAnnexPrivateJournalDir
|
||||||
, return gitAnnexJournalDir
|
, return gitAnnexJournalDir
|
||||||
)
|
)
|
||||||
let jfile = fromRawFilePath $ jd P.</> journalFile file
|
let jfile = jd P.</> journalFile file
|
||||||
let write = liftIO $ ifM (doesFileExist jfile)
|
ifM (liftIO $ R.doesPathExist jfile)
|
||||||
( withFile jfile AppendMode $ \h ->
|
( return (Just (AppendableJournalFile (jd, jfile)))
|
||||||
writeJournalHandle h toappend
|
, return Nothing
|
||||||
, withFile jfile WriteMode $ \h -> do
|
|
||||||
writeJournalHandle h oldcontent
|
|
||||||
writeJournalHandle h toappend
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
{- Appends content to an existing journal file.
|
||||||
|
-
|
||||||
|
- TODO: Unsafe! Does not append atomically. -}
|
||||||
|
appendJournalFile :: Journalable content => JournalLocked -> AppendableJournalFile -> content -> Annex ()
|
||||||
|
appendJournalFile _jl (AppendableJournalFile (jd, jfile)) content = do
|
||||||
|
let write = liftIO $ withFile (fromRawFilePath jfile) AppendMode $ \h ->
|
||||||
|
writeJournalHandle h content
|
||||||
write `catchIO` (const (createAnnexDirectory jd >> write))
|
write `catchIO` (const (createAnnexDirectory jd >> write))
|
||||||
|
|
||||||
data JournalledContent
|
data JournalledContent
|
||||||
|
|
|
@ -19,6 +19,8 @@ git-annex (10.20220625) UNRELEASED; urgency=medium
|
||||||
importtree=yes or exporttree=yes.
|
importtree=yes or exporttree=yes.
|
||||||
* Fix a reversion that prevented --batch commands (and the assistant)
|
* Fix a reversion that prevented --batch commands (and the assistant)
|
||||||
from noticing data written to the journal by other commands.
|
from noticing data written to the journal by other commands.
|
||||||
|
* Added annex.alwayscompact setting which can be unset to speed up
|
||||||
|
writes to the git-annex branch in some cases.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 28 Jun 2022 14:49:17 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 28 Jun 2022 14:49:17 -0400
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,11 @@ getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`)
|
||||||
|
|
||||||
setUrlPresent :: Key -> URLString -> Annex ()
|
setUrlPresent :: Key -> URLString -> Annex ()
|
||||||
setUrlPresent key url = do
|
setUrlPresent key url = do
|
||||||
us <- getUrls key
|
-- Avoid reading the url log when not compacting, for speed.
|
||||||
|
us <- ifM (annexAlwaysCompact <$> Annex.getGitConfig)
|
||||||
|
( getUrls key
|
||||||
|
, pure mempty
|
||||||
|
)
|
||||||
unless (url `elem` us) $ do
|
unless (url `elem` us) $ do
|
||||||
config <- Annex.getGitConfig
|
config <- Annex.getGitConfig
|
||||||
addLog (Annex.Branch.RegardingUUID []) (urlLogFile config key)
|
addLog (Annex.Branch.RegardingUUID []) (urlLogFile config key)
|
||||||
|
|
|
@ -82,6 +82,7 @@ data GitConfig = GitConfig
|
||||||
, annexBloomAccuracy :: Maybe Int
|
, annexBloomAccuracy :: Maybe Int
|
||||||
, annexSshCaching :: Maybe Bool
|
, annexSshCaching :: Maybe Bool
|
||||||
, annexAlwaysCommit :: Bool
|
, annexAlwaysCommit :: Bool
|
||||||
|
, annexAlwaysCompact :: Bool
|
||||||
, annexCommitMessage :: Maybe String
|
, annexCommitMessage :: Maybe String
|
||||||
, annexMergeAnnexBranches :: Bool
|
, annexMergeAnnexBranches :: Bool
|
||||||
, annexDelayAdd :: Maybe Int
|
, annexDelayAdd :: Maybe Int
|
||||||
|
@ -164,6 +165,7 @@ extractGitConfig configsource r = GitConfig
|
||||||
, annexBloomAccuracy = getmayberead (annexConfig "bloomaccuracy")
|
, annexBloomAccuracy = getmayberead (annexConfig "bloomaccuracy")
|
||||||
, annexSshCaching = getmaybebool (annexConfig "sshcaching")
|
, annexSshCaching = getmaybebool (annexConfig "sshcaching")
|
||||||
, annexAlwaysCommit = getbool (annexConfig "alwayscommit") True
|
, annexAlwaysCommit = getbool (annexConfig "alwayscommit") True
|
||||||
|
, annexAlwaysCompact = getbool (annexConfig "alwayscompact") True
|
||||||
, annexCommitMessage = getmaybe (annexConfig "commitmessage")
|
, annexCommitMessage = getmaybe (annexConfig "commitmessage")
|
||||||
, annexMergeAnnexBranches = getbool (annexConfig "merge-annex-branches") True
|
, annexMergeAnnexBranches = getbool (annexConfig "merge-annex-branches") True
|
||||||
, annexDelayAdd = getmayberead (annexConfig "delayadd")
|
, annexDelayAdd = getmayberead (annexConfig "delayadd")
|
||||||
|
|
|
@ -1038,6 +1038,19 @@ repository, using [[git-annex-config]]. See its man page for a list.)
|
||||||
This works well in combination with annex.alwayscommit=false,
|
This works well in combination with annex.alwayscommit=false,
|
||||||
to gather up a set of changes and commit them with a message you specify.
|
to gather up a set of changes and commit them with a message you specify.
|
||||||
|
|
||||||
|
* `annex.alwayscompact`
|
||||||
|
|
||||||
|
By default, git-annex compacts data it records in the git-annex branch.
|
||||||
|
Setting this to false avoids doing that compaction in some cases, which
|
||||||
|
can speed up operations that populate the git-annex branch with a lot
|
||||||
|
of data. However, when used with operations that overwrite old values in
|
||||||
|
the git-annex branch, that may cause the git-annex branch to use more disk
|
||||||
|
space, or slow down reading data from it.
|
||||||
|
|
||||||
|
An example of a command that can be sped up by using
|
||||||
|
`-c annex.alwayscompact=false` is `git-annex registerurl --batch`,
|
||||||
|
when adding a large number of urls to the same key.
|
||||||
|
|
||||||
* `annex.allowsign`
|
* `annex.allowsign`
|
||||||
|
|
||||||
By default git-annex avoids gpg signing commits that it makes when
|
By default git-annex avoids gpg signing commits that it makes when
|
||||||
|
|
Loading…
Reference in a new issue