diff --git a/CmdLine/GitRemoteAnnex.hs b/CmdLine/GitRemoteAnnex.hs index e6cdbc50c9..2667e92f8c 100644 --- a/CmdLine/GitRemoteAnnex.hs +++ b/CmdLine/GitRemoteAnnex.hs @@ -273,6 +273,10 @@ fullPush :: State -> Remote -> [Ref] -> Annex (Bool, State) fullPush st rmt refs = guardPush st $ do oldmanifest <- maybe (downloadManifestWhenPresent rmt) pure (manifestCache st) + fullPush' oldmanifest st rmt refs + +fullPush' :: Manifest -> State -> Remote -> [Ref] -> Annex (Bool, State) +fullPush' oldmanifest st rmt refs = do let bs = map Git.Bundle.fullBundleSpec refs (bundlekey, uploadbundle) <- generateGitBundle rmt bs oldmanifest let manifest = mkManifest [bundlekey] $ @@ -297,14 +301,19 @@ guardPush st a = catchNonAsync a $ \ex -> do incrementalPush :: State -> Remote -> M.Map Ref Sha -> M.Map Ref Sha -> Annex (Bool, State) incrementalPush st rmt oldtrackingrefs newtrackingrefs = guardPush st $ do oldmanifest <- maybe (downloadManifestWhenPresent rmt) pure (manifestCache st) - bs <- calc [] (M.toList newtrackingrefs) - (bundlekey, uploadbundle) <- generateGitBundle rmt bs oldmanifest - let manifest = oldmanifest <> mkManifest [bundlekey] mempty - manifest' <- startPush rmt manifest - uploadbundle - uploadManifest rmt manifest' - return (True, st { manifestCache = Nothing }) + if length (inManifest oldmanifest) + 1 > remoteAnnexMaxGitBundles (Remote.gitconfig rmt) + then fullPush' oldmanifest st rmt (M.keys newtrackingrefs) + else go oldmanifest where + go oldmanifest = do + bs <- calc [] (M.toList newtrackingrefs) + (bundlekey, uploadbundle) <- generateGitBundle rmt bs oldmanifest + let manifest = oldmanifest <> mkManifest [bundlekey] mempty + manifest' <- startPush rmt manifest + uploadbundle + uploadManifest rmt manifest' + return (True, st { manifestCache = Nothing }) + calc c [] = return (reverse c) calc c ((ref, sha):refs) = case M.lookup ref oldtrackingrefs of Just oldsha diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index e1090a1121..42f1811997 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -373,6 +373,7 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexBwLimitDownload :: Maybe BwRate , remoteAnnexAllowUnverifiedDownloads :: Bool , remoteAnnexConfigUUID :: Maybe UUID + , remoteAnnexMaxGitBundles :: Int , remoteAnnexAllowEncryptedGitRepo :: Bool , remoteUrl :: Maybe String @@ -453,6 +454,8 @@ extractRemoteGitConfig r remotename = do readBwRatePerSecond =<< getmaybe "bwlimit-download" , remoteAnnexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ getmaybe ("security-allow-unverified-downloads") + , remoteAnnexMaxGitBundles = + fromMaybe 100 (getmayberead "max-git-bundles") , remoteAnnexConfigUUID = toUUID <$> getmaybe "config-uuid" , remoteAnnexShell = getmaybe "shell" , remoteAnnexSshOptions = getoptions "ssh-options" diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 65750ab917..19570dcfb8 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1648,6 +1648,17 @@ Remotes are configured using these settings in `.git/config`. remotes, and is set when using [[git-annex-initremote]](1) with the `--private` option. +* `remote..annex-max-git-bundles`, `annex.max-git-bundles` + + When using [[git-remote-annex]] to store a git repository in a special + remote, this configures how many separate git bundle objects to store + in the special remote before re-uploading a single git bundle that contains + the entire git repository. + + The default is 100, which aims to avoid often needing to often re-upload, + while preventing a clone or fetch needing to download too many objects. + Set to 0 to disable re-uploading. + * `remote..annex-allow-encrypted-gitrepo` Setting this to true allows using [[git-remote-annex]] to push the git diff --git a/doc/git-remote-annex.mdwn b/doc/git-remote-annex.mdwn index 2d08e33b0a..9c218d96f3 100644 --- a/doc/git-remote-annex.mdwn +++ b/doc/git-remote-annex.mdwn @@ -36,29 +36,11 @@ When using the shorthand "annex::" url, the full url will be displayed each time you git pull or push, when it's possible for git-annex to determine it. -When a special remote needs some additional credentials to be provided, -they are not included in the URL, and need to be provided when cloning from -the special remote. That is typically done by setting environment -variables. Some special remotes may also need environment variables to be -set when pulling or pushing. - -The git repository is stored in the special remote using special annex objects -with names starting with "GITMANIFEST" and "GITBUNDLE". For details about -how the git repository is stored, see - - -Pushes to a special remote are usually done incrementally. However, -sometimes the whole git repository (but not the annex) needs to be -re-uploaded. That is done when force pushing a ref, or deleting a -ref from the remote. - -The special remote accumulates one GITBUNDLE object per push, and old -objects are usually not deleted. This means that refs pushed to the special -remote can still be accessed even after deleting or overwriting them. -A push that deletes every ref from the special remote does delete all -the accumulated GITBUNDLE objects. But of course, making such a push -means that someone clones from the special remote at that point in time -will see an empty remote. +When a special remote needs some credentials to be used, they are not +included in the URL, and will need to be provided when cloning from the +special remote. That is typically done by setting environment variables. +Some special remotes may also need environment variables to be set when +pulling or pushing. Like any git repository, a git repository stored on a special remote can have conflicting things pushed to it from different places. This mostly @@ -69,6 +51,25 @@ to be overwritten by the other one. In this situation, the overwritten push will appear to have succeeded, but pulling later will show the true situation. +The git repository is stored in the special remote using special annex objects +with names starting with "GITMANIFEST" and "GITBUNDLE". For details, see: + + +Pushes to a special remote are usually done incrementally. However, +sometimes the whole git repository (but not the annex) needs to be +re-uploaded. That is done when force pushing a ref, or deleting a +ref from the remote. It's also done when too many git bundles +accumulate in the special remote, as configured by the +`remote..annex-max-git-bundles` git config. + +Note that a re-upload of the repository does not delete old GITBUNDLE +objects from it. This means that refs pushed to the special +remote can still be accessed even after deleting or overwriting them. +A push that deletes every ref from the special remote will delete all +the accumulated GITBUNDLE objects. But of course, making such a push +means that someone who clones from the special remote at that point in time +will see an empty remote. + # SEE ALSO gitremote-helpers(1)