Merge branch 'master' into proxy

This commit is contained in:
Joey Hess 2024-06-12 09:49:30 -04:00
commit 178da0dc99
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 194 additions and 3 deletions

View file

@ -0,0 +1,75 @@
### Please describe the problem.
Immediately after startin the git-annex web application the git-annex process uses all available memory. After some time the linux oom-killer stops git-annex
### What steps will reproduce the problem?
1. create a git-annex repo
2. start git-annex webapp
### What version of git-annex are you using? On what operating system?
ii git-annex 10.20240430-1 amd64
debian trixie/sid
### Please provide any additional information below.
[[!format sh """
# If you can, paste a complete transcript of the problem occurring here.
# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
syslog
root@hwarang:/var/log# grep oom *
grep: cups: Ist ein Verzeichnis
grep: gdm3: Ist ein Verzeichnis
kern.log:2024-06-10T15:44:54.288491+02:00 hwarang kernel: systemd invoked oom-killer: gfp_mask=0x140cca(GFP_HIGHUSER_MOVABLE|__GFP_COMP), order=0, oom_score_adj=0
kern.log:2024-06-10T15:44:54.296307+02:00 hwarang kernel: oom_kill_process+0xfa/0x200
kern.log:2024-06-10T15:44:54.305858+02:00 hwarang kernel: [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name
kern.log:2024-06-10T15:44:54.317261+02:00 hwarang kernel: oom-kill:constraint=CONSTRAINT_NONE,nodemask=(null),cpuset=/,mems_allowed=0,global_oom,task_memcg=/user.slice/user-1000.slice/user@1000.service/app.slice/app-gnome-git\x2dannex-7364.scope,task=git-annex,pid=7388,uid=1000
kern.log:2024-06-10T15:44:54.317262+02:00 hwarang kernel: Out of memory: Killed process 7388 (git-annex) total-vm:83979364kB, anon-rss:30780216kB, file-rss:1792kB, shmem-rss:0kB, UID:1000 pgtables:122696kB oom_score_adj:100
kern.log:2024-06-10T15:44:56.485580+02:00 hwarang kernel: oom_reaper: reaped process 7388 (git-annex), now anon-rss:240kB, file-rss:336kB, shmem-rss:0kB
kern.log:2024-06-10T15:53:38.057774+02:00 hwarang kernel: teamviewerd invoked oom-killer: gfp_mask=0x140cca(GFP_HIGHUSER_MOVABLE|__GFP_COMP), order=0, oom_score_adj=0
kern.log:2024-06-10T15:53:38.059980+02:00 hwarang kernel: oom_kill_process+0xfa/0x200
kern.log:2024-06-10T15:53:38.062710+02:00 hwarang kernel: [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name
kern.log:2024-06-10T15:53:38.066977+02:00 hwarang kernel: oom-kill:constraint=CONSTRAINT_NONE,nodemask=(null),cpuset=/,mems_allowed=0,global_oom,task_memcg=/user.slice/user-1000.slice/user@1000.service/app.slice/app-gnome-git\x2dannex-7833.scope,task=git-annex,pid=7856,uid=1000
kern.log:2024-06-10T15:53:38.066978+02:00 hwarang kernel: Out of memory: Killed process 7856 (git-annex) total-vm:83979364kB, anon-rss:31243884kB, file-rss:1664kB, shmem-rss:0kB, UID:1000 pgtables:122900kB oom_score_adj:100
kern.log:2024-06-10T15:53:40.337624+02:00 hwarang kernel: oom_reaper: reaped process 7856 (git-annex), now anon-rss:540kB, file-rss:128kB, shmem-rss:0kB
grep: postgresql: Ist ein Verzeichnis
grep: private: Ist ein Verzeichnis
syslog:2024-06-10T15:44:54.288491+02:00 hwarang kernel: systemd invoked oom-killer: gfp_mask=0x140cca(GFP_HIGHUSER_MOVABLE|__GFP_COMP), order=0, oom_score_adj=0
syslog:2024-06-10T15:44:54.296307+02:00 hwarang kernel: oom_kill_process+0xfa/0x200
syslog:2024-06-10T15:44:54.305858+02:00 hwarang kernel: [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name
syslog:2024-06-10T15:44:54.317261+02:00 hwarang kernel: oom-kill:constraint=CONSTRAINT_NONE,nodemask=(null),cpuset=/,mems_allowed=0,global_oom,task_memcg=/user.slice/user-1000.slice/user@1000.service/app.slice/app-gnome-git\x2dannex-7364.scope,task=git-annex,pid=7388,uid=1000
syslog:2024-06-10T15:44:54.317262+02:00 hwarang kernel: Out of memory: Killed process 7388 (git-annex) total-vm:83979364kB, anon-rss:30780216kB, file-rss:1792kB, shmem-rss:0kB, UID:1000 pgtables:122696kB oom_score_adj:100
syslog:2024-06-10T15:44:56.485580+02:00 hwarang kernel: oom_reaper: reaped process 7388 (git-annex), now anon-rss:240kB, file-rss:336kB, shmem-rss:0kB
syslog:2024-06-10T15:44:56.489365+02:00 hwarang systemd[3185]: app-gnome-git\x2dannex-7364.scope: Failed with result 'oom-kill'.
syslog:2024-06-10T15:53:38.057774+02:00 hwarang kernel: teamviewerd invoked oom-killer: gfp_mask=0x140cca(GFP_HIGHUSER_MOVABLE|__GFP_COMP), order=0, oom_score_adj=0
syslog:2024-06-10T15:53:38.059980+02:00 hwarang kernel: oom_kill_process+0xfa/0x200
syslog:2024-06-10T15:53:38.062710+02:00 hwarang kernel: [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name
syslog:2024-06-10T15:53:38.066977+02:00 hwarang kernel: oom-kill:constraint=CONSTRAINT_NONE,nodemask=(null),cpuset=/,mems_allowed=0,global_oom,task_memcg=/user.slice/user-1000.slice/user@1000.service/app.slice/app-gnome-git\x2dannex-7833.scope,task=git-annex,pid=7856,uid=1000
syslog:2024-06-10T15:53:38.066978+02:00 hwarang kernel: Out of memory: Killed process 7856 (git-annex) total-vm:83979364kB, anon-rss:31243884kB, file-rss:1664kB, shmem-rss:0kB, UID:1000 pgtables:122900kB oom_score_adj:100
syslog:2024-06-10T15:53:40.337624+02:00 hwarang kernel: oom_reaper: reaped process 7856 (git-annex), now anon-rss:540kB, file-rss:128kB, shmem-rss:0kB
syslog:2024-06-10T15:53:40.365942+02:00 hwarang systemd[3185]: app-gnome-git\x2dannex-7833.scope: Failed with result 'oom-kill'.
grep: tomcat9: Ist ein Verzeichnis
Extranct syslog End
daemon.log
(scanning...) (started...)
daemon.status
lastRunning:1718027123.570568257s
scanComplete:False
sanityCheckRunning:False
lastSanityCheck:
# End of transcript or log.
"""]]
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)

View file

@ -272,9 +272,9 @@ Could the proxy be in front of a special remote that uses exporttree=yes?
Some possible approaches:
* Proxy caches files until all the files in the configured
* Proxy caches files somewhere until all the files in the configured
annex-tracking-branch are available, then exports them all to the special
remote. Not ideal at all.
remote.
* Proxy exports each file to the special remote as it is received.
It records an incomplete tree export after each export.
Once all files in the configured annex-tracking-branch have been sent,
@ -288,9 +288,55 @@ The first two approaches need some way to communicate the
configured annex-tracking-branch over the P2P protocol. Or to communicate
the tree that it currently points to.
A proxy for a git repo does not proxy access to the git repo itself, so
`git push origin-foo master` actually pushes the ref to the proxy's own git
repo. Perhaps this points in a direction of how the proxy could learn what
tree to export to exporttree=yes remotes. But only vaguely since how would
it pick which of multiple branches to export?
Perhaps configure the annex-tracking-branch in the git-annex branch?
That might be generally useful when working with exporttree=yes remotes.
The first two approaches also have a complication when a key is sent to
the proxy that is not part of the configured annex-tracking-branch. What
does the proxy do with it?
does the proxy do with it? There seem three possibilities:
1. Reject the transfer of the key.
2. Send the key to another proxied remote that is not exporttree=yes
(and get it from there later if needed to finish populating an export)
3. Store the key locally. (Not desirable because proxy repos may be on
small disks as they don't usually need to hold any files.)
The third approach would mean the user needs to use `git-annex export --to`
in order to update proxied exporttree remotes. Which gets in the way of the
other proxy workflows and requires them to know that the proxy has an
exporttree remote behind it.
Tentative design for exporttree=yes with proxies:
* Configure annex-tracking-branch for the proxy in the git-annex branch.
(For the proxy as a whole, or for specific exporttree=yes repos behind
it?)
* Then the user's workflow is simply: `git-annex push proxy`
* sync/push need to first push any updated annex-tracking-branch to the
proxy before sending content to it. (Currently sync only pushes at the
end.)
* If proxied remotes are all exporttree=yes, the proxy rejects any
transfers of a key that is not in the annex-tracking-branch that it
currently knows about. If there is any other proxied remote, the proxy
can direct such transfers to it.
* Upon receiving a new annex-tracking-branch or any transfer of a key
used in the current annex-tracking-branch, the proxy can update
the exporttree=yes remotes. This needs to happen incrementally,
eg upon receiving a key, just proxy it on to the exporttree=yes remote,
and update the export database. Once all keys are received, update
the git-annex branch to indicate a new tree has been exported.
* Upon receiving a git push of the annex-tracking-branch, a proxy might
be able to get all the changed objects from non-exporttree=yes proxied
remotes that contain them. If so it can update the exporttree=yes
remote automatically and inexpensively. At the same time, a
`git-annex push` will be attempting to send those same objects.
So somehow the proxy will need to manage this situation.
## possible enhancement: indirect uploads

View file

@ -0,0 +1,70 @@
I have this error.
$ git annex --debug enableremote server
[2024-06-11 08:16:48.356839038] (Utility.Process) process [17496] read: git ["--git-dir=../.git","--work-tree=..","--literal-pa
thspecs","-c","annex.debug=true","show-ref","git-annex"]
[2024-06-11 08:16:48.377496927] (Utility.Process) process [17496] done ExitSuccess
[2024-06-11 08:16:48.377922696] (Utility.Process) process [17501] read: git ["--git-dir=../.git","--work-tree=..","--literal-pa
thspecs","-c","annex.debug=true","show-ref","--hash","refs/heads/git-annex"]
[2024-06-11 08:16:48.397529156] (Utility.Process) process [17501] done ExitSuccess
[2024-06-11 08:16:48.399718045] (Utility.Process) process [17507] chat: git ["--git-dir=../.git","--work-tree=..","--literal-pa
thspecs","-c","annex.debug=true","cat-file","--batch"]
enableremote (normal) server [2024-06-11 08:16:48.415631528] (Utility.Process) process [17509] call: git ["--git-dir=../.git","-
-work-tree=..","--literal-pathspecs","-c","annex.debug=true","config","remote.server.annex-ignore","false"]
[2024-06-11 08:16:48.425103598] (Utility.Process) process [17509] done ExitSuccess
[2024-06-11 08:16:48.425415775] (Utility.Process) process [17510] read: git ["config","--null","--list"] in ".."
[2024-06-11 08:16:48.433272117] (Git.Config) git config read: [("",[""]),("annex.backend",["SHA256"]),("annex.tune.objecthashlo
wer",["true"]),("annex.uuid",["b1510484-6489-4351-9876-993041f22cb3"]),("annex.version",["10"]),("core.bare",["false"]),("core.
filemode",["true"]),("core.logallrefupdates",["true"]),("core.repositoryformatversion",["0"]),("filter.annex.clean",["git-annex
smudge --clean -- %f"]),("filter.annex.process",["git-annex filter-process"]),("filter.annex.smudge",["git-annex smudge -- %f"
]),("init.defaultbranch",["master"]),("remote.server.annex-ignore",["false"]),("remote.server.fetch",["+refs/heads/*:refs/remotes
/server/*"]),("remote.server.url",["ssh://server.local:/mnt/user/data"]),("safe.directory",["/mnt/user/data"]),("user.email",["roo
t","root@delta.local"]),("user.name",["root","root"])]
[2024-06-11 08:16:48.433479676] (Utility.Process) process [17510] done ExitSuccess
[2024-06-11 08:16:48.435182799] (Utility.Process) process [17511] read: ssh ["server.local","-S","../.git/annex/ssh/server.local"
,"-o","ControlMaster=auto","-o","ControlPersist=yes","-n","-T","git-annex-shell 'configlist' '/mnt/user/data' '--debug'"]
[2024-06-11 08:16:48.619602925] (Utility.Process) process [17511] done ExitFailure 255
Unable to parse git config from server
[2024-06-11 08:16:48.619932626] (Utility.Process) process [17516] call: git ["--git-dir=../.git","--work-tree=..","--literal-pa
thspecs","-c","annex.debug=true","fetch","--quiet","server"]
[2024-06-11 08:16:49.018922661] (Utility.Process) process [17516] done ExitSuccess
Remote server does not have git-annex installed; setting annex-ignore
This could be a problem with the git-annex installation on the remote. Please make sure that git-annex-shell is available in
PATH when you ssh into the remote. Once you have fixed the git-annex installation, run: git annex enableremote server
[2024-06-11 08:16:49.019278841] (Utility.Process) process [17520] call: git ["--git-dir=../.git","--work-tree=..","--literal-pa
thspecs","-c","annex.debug=true","config","remote.server.annex-ignore","true"]
[2024-06-11 08:16:49.028550677] (Utility.Process) process [17520] done ExitSuccess
[2024-06-11 08:16:49.028909964] (Utility.Process) process [17521] read: git ["config","--null","--list"] in ".."
[2024-06-11 08:16:49.036666793] (Git.Config) git config read: [("",[""]),("annex.backend",["SHA256"]),("annex.tune.objecthashlo
wer",["true"]),("annex.uuid",["b1510484-6489-4351-9876-993041f22cb3"]),("annex.version",["10"]),("core.bare",["false"]),("core.
filemode",["true"]),("core.logallrefupdates",["true"]),("core.repositoryformatversion",["0"]),("filter.annex.clean",["git-annex
smudge --clean -- %f"]),("filter.annex.process",["git-annex filter-process"]),("filter.annex.smudge",["git-annex smudge -- %f"
]),("init.defaultbranch",["master"]),("remote.server.annex-ignore",["true"]),("remote.server.fetch",["+refs/heads/*:refs/remotes/
server/*"]),("remote.server.url",["ssh://server.local:/mnt/user/data"]),("safe.directory",["/mnt/user/data"]),("user.email",["root
","root@delta.local"]),("user.name",["root","root"])]
[2024-06-11 08:16:49.036812734] (Utility.Process) process [17521] done ExitSuccess
failed
[2024-06-11 08:16:49.03837688] (Utility.Process) process [17522] read: ssh ["-O","stop","-S","server.local","-o","ControlMaster=
auto","-o","ControlPersist=yes","localhost"] in "../.git/annex/ssh/"
[2024-06-11 08:16:49.042787993] (Utility.Process) process [17522] done ExitFailure 255
[2024-06-11 08:16:49.043822645] (Utility.Process) process [17507] done ExitSuccess
enableremote: 1 failed
I can reproduce it by calling \`ssh\` myself like this.
ssh server.local -S ../.git/annex/ssh/server.local -o ControlMaster=auto -o ControlPersist=yes -n -T git-annex-shell 'configlist' '/mnt/user/data' '--debug'
Control socket connect(../.git/annex/ssh/server.local): Connection refused
Failed to connect to new control master
If I change the location of the socket file to use my home folder then it works.
ssh server.local -S $HOME/server.local -o ControlMaster=auto -o ControlPersist=yes -n -T git-annex-shell 'configlist' '/mnt/user/data' '--debug'
annex.uuid=23568973-b0e8-493f-9404-cce91346a818
core.gcrypt-id=
Why isn’t enableremote working?
Thanks!