Merge branch 'master' into streamproxy

This commit is contained in:
Joey Hess 2024-10-18 10:18:59 -04:00
commit 8c7047fc77
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
38 changed files with 637 additions and 24 deletions

View file

@ -4,8 +4,13 @@ git-annex (10.20240928) UNRELEASED; urgency=medium
* Added GETORDERED request to external special remote protocol. * Added GETORDERED request to external special remote protocol.
When the external special remote responds with ORDERED, it can stream When the external special remote responds with ORDERED, it can stream
through a proxy. through a proxy.
* p2phttp: Support serving unauthenticated users while requesting
authentication for operations that need it. Eg, --unauth-readonly
can be combined with --authenv.
* Allow enabling the servant build flag with older versions of stm,
allowing building with ghc 9.0.2.
-- Joey Hess <id@joeyh.name> Tue, 15 Oct 2024 12:12:18 -0400 -- Joey Hess <id@joeyh.name> Thu, 17 Oct 2024 11:02:17 -0400
git-annex (10.20240927) upstream; urgency=medium git-annex (10.20240927) upstream; urgency=medium

View file

@ -128,10 +128,15 @@ seek o = getAnnexWorkerPool $ \workerpool ->
mkGetServerMode :: M.Map Auth P2P.ServerMode -> Options -> GetServerMode mkGetServerMode :: M.Map Auth P2P.ServerMode -> Options -> GetServerMode
mkGetServerMode _ o _ Nothing mkGetServerMode _ o _ Nothing
| wideOpenOption o = Just P2P.ServeReadWrite | wideOpenOption o =
| unauthAppendOnlyOption o = Just P2P.ServeAppendOnly ServerMode P2P.ServeReadWrite False
| unauthReadOnlyOption o = Just P2P.ServeReadOnly | unauthAppendOnlyOption o =
| otherwise = Nothing ServerMode P2P.ServeAppendOnly canauth
| unauthReadOnlyOption o =
ServerMode P2P.ServeReadOnly canauth
| otherwise = CannotServeRequests
where
canauth = authEnvOption o || authEnvHttpOption o
mkGetServerMode authenv o issecure (Just auth) = mkGetServerMode authenv o issecure (Just auth) =
case (issecure, authEnvOption o, authEnvHttpOption o) of case (issecure, authEnvOption o, authEnvHttpOption o) of
(Secure, True, _) -> checkauth (Secure, True, _) -> checkauth
@ -139,9 +144,10 @@ mkGetServerMode authenv o issecure (Just auth) =
_ -> noauth _ -> noauth
where where
checkauth = case M.lookup auth authenv of checkauth = case M.lookup auth authenv of
Just servermode -> Just servermode Just servermode -> ServerMode servermode False
Nothing -> noauth Nothing -> noauth
noauth = mkGetServerMode authenv o issecure Nothing noauth = mkGetServerMode authenv noautho issecure Nothing
noautho = o { authEnvOption = False, authEnvHttpOption = False }
getAuthEnv :: IO (M.Map Auth P2P.ServerMode) getAuthEnv :: IO (M.Map Auth P2P.ServerMode)
getAuthEnv = do getAuthEnv = do

View file

@ -36,6 +36,7 @@ import P2P.Http.Url
import Annex.Concurrent import Annex.Concurrent
import Utility.Url (BasicAuth(..)) import Utility.Url (BasicAuth(..))
import Utility.HumanTime import Utility.HumanTime
import Utility.STM
import qualified Git.Credential as Git import qualified Git.Credential as Git
import Servant hiding (BasicAuthData(..)) import Servant hiding (BasicAuthData(..))
@ -46,7 +47,6 @@ import Network.HTTP.Client
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy.Internal as LI import qualified Data.ByteString.Lazy.Internal as LI
import qualified Data.Map as M import qualified Data.Map as M
import Control.Concurrent.STM
import Control.Concurrent.Async import Control.Concurrent.Async
import Control.Concurrent import Control.Concurrent
import System.IO.Unsafe import System.IO.Unsafe

View file

@ -32,13 +32,13 @@ import Annex.WorkerPool
import Types.WorkerPool import Types.WorkerPool
import Types.Direction import Types.Direction
import Utility.Metered import Utility.Metered
import Utility.STM
import Servant import Servant
import qualified Servant.Types.SourceT as S import qualified Servant.Types.SourceT as S
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI import qualified Data.ByteString.Lazy.Internal as LI
import Control.Concurrent.STM
import Control.Concurrent.Async import Control.Concurrent.Async
import Control.Concurrent import Control.Concurrent
import System.IO.Unsafe import System.IO.Unsafe

View file

@ -35,12 +35,12 @@ import Annex.Proxy
import Annex.Cluster import Annex.Cluster
import qualified P2P.Proxy as Proxy import qualified P2P.Proxy as Proxy
import qualified Types.Remote as Remote import qualified Types.Remote as Remote
import Utility.STM
import Servant import Servant
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import qualified Data.Set as S import qualified Data.Set as S
import Control.Concurrent.Async import Control.Concurrent.Async
import Control.Concurrent.STM
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
data P2PHttpServerState = P2PHttpServerState data P2PHttpServerState = P2PHttpServerState
@ -52,8 +52,14 @@ data P2PHttpServerState = P2PHttpServerState
type AnnexWorkerPool = TMVar (WorkerPool (Annex.AnnexState, Annex.AnnexRead)) type AnnexWorkerPool = TMVar (WorkerPool (Annex.AnnexState, Annex.AnnexRead))
-- Nothing when the server is not allowed to serve any requests. type GetServerMode = IsSecure -> Maybe Auth -> ServerMode
type GetServerMode = IsSecure -> Maybe Auth -> Maybe P2P.ServerMode
data ServerMode
= ServerMode
{ serverMode :: P2P.ServerMode
, authenticationAllowed :: Bool
}
| CannotServeRequests
mkP2PHttpServerState :: AcquireP2PConnection -> AnnexWorkerPool -> GetServerMode -> IO P2PHttpServerState mkP2PHttpServerState :: AcquireP2PConnection -> AnnexWorkerPool -> GetServerMode -> IO P2PHttpServerState
mkP2PHttpServerState acquireconn annexworkerpool getservermode = P2PHttpServerState mkP2PHttpServerState acquireconn annexworkerpool getservermode = P2PHttpServerState
@ -143,13 +149,23 @@ checkAuthActionClass
-> (P2P.ServerMode -> Handler a) -> (P2P.ServerMode -> Handler a)
-> Handler a -> Handler a
checkAuthActionClass st sec auth actionclass go = checkAuthActionClass st sec auth actionclass go =
case (getServerMode st sec auth, actionclass) of case (sm, actionclass) of
(Just P2P.ServeReadWrite, _) -> go P2P.ServeReadWrite (ServerMode { serverMode = P2P.ServeReadWrite }, _) ->
(Just P2P.ServeAppendOnly, RemoveAction) -> throwError err403 go P2P.ServeReadWrite
(Just P2P.ServeAppendOnly, _) -> go P2P.ServeAppendOnly (ServerMode { serverMode = P2P.ServeAppendOnly }, RemoveAction) ->
(Just P2P.ServeReadOnly, ReadAction) -> go P2P.ServeReadOnly throwError $ forbiddenWithoutAuth sm
(Just P2P.ServeReadOnly, _) -> throwError err403 (ServerMode { serverMode = P2P.ServeAppendOnly }, _) ->
(Nothing, _) -> throwError basicAuthRequired go P2P.ServeAppendOnly
(ServerMode { serverMode = P2P.ServeReadOnly }, ReadAction) ->
go P2P.ServeReadOnly
(ServerMode { serverMode = P2P.ServeReadOnly }, _) ->
throwError $ forbiddenWithoutAuth sm
(CannotServeRequests, _) -> throwError basicAuthRequired
where
sm = getServerMode st sec auth
forbiddenAction :: ServerError
forbiddenAction = err403
basicAuthRequired :: ServerError basicAuthRequired :: ServerError
basicAuthRequired = err401 { errHeaders = [(h, v)] } basicAuthRequired = err401 { errHeaders = [(h, v)] }
@ -157,6 +173,11 @@ basicAuthRequired = err401 { errHeaders = [(h, v)] }
h = "WWW-Authenticate" h = "WWW-Authenticate"
v = "Basic realm=\"git-annex\", charset=\"UTF-8\"" v = "Basic realm=\"git-annex\", charset=\"UTF-8\""
forbiddenWithoutAuth :: ServerMode -> ServerError
forbiddenWithoutAuth sm
| authenticationAllowed sm = basicAuthRequired
| otherwise = forbiddenAction
data ConnectionParams = ConnectionParams data ConnectionParams = ConnectionParams
{ connectionProtocolVersion :: P2P.ProtocolVersion { connectionProtocolVersion :: P2P.ProtocolVersion
, connectionServerUUID :: UUID , connectionServerUUID :: UUID

View file

@ -5,8 +5,12 @@
- License: BSD-2-clause - License: BSD-2-clause
-} -}
{-# LANGUAGE CPP #-}
module Utility.OpenFile where module Utility.OpenFile where
#ifndef mingw32_HOST_OS
import System.IO import System.IO
import System.Posix.IO import System.Posix.IO
import GHC.IO.FD import GHC.IO.FD
@ -30,3 +34,5 @@ openFileBeingWritten f = do
fd <- openFdWithMode f ReadOnly Nothing defaultFileFlags fd <- openFdWithMode f ReadOnly Nothing defaultFileFlags
(fd', fdtype) <- mkFD (fromIntegral fd) ReadMode (Just (Stream, 0, 0)) False False (fd', fdtype) <- mkFD (fromIntegral fd) ReadMode (Just (Stream, 0, 0)) False False
mkHandleFromFD fd' fdtype (fromRawFilePath f) ReadMode False Nothing mkHandleFromFD fd' fdtype (fromRawFilePath f) ReadMode False Nothing
#endif

23
Utility/STM.hs Normal file
View file

@ -0,0 +1,23 @@
{- support for old versions of the stm package
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
{-# LANGUAGE CPP #-}
{-# OPTIONS_GHC -fno-warn-tabs #-}
module Utility.STM (
module Control.Concurrent.STM,
#if ! MIN_VERSION_stm(2,5,1)
writeTMVar
#endif
) where
import Control.Concurrent.STM
#if ! MIN_VERSION_stm(2,5,1)
writeTMVar :: TMVar t -> t -> STM ()
writeTMVar t new = tryTakeTMVar t >> putTMVar t new
#endif

View file

@ -54,7 +54,7 @@ in `.gitattributes`:
`BLAKE2SP224E`, `BLAKE2SP256E` `BLAKE2SP224E`, `BLAKE2SP256E`
-- Fast [Blake2 hash](https://blake2.net/) variants optimised for -- Fast [Blake2 hash](https://blake2.net/) variants optimised for
8-way CPUs. 8-way CPUs.
`VURL` -- This is like an `URL` (see below) but the content can * `VURL` -- This is like an `URL` (see below) but the content can
be verified with a cryptographically secure checksum that is be verified with a cryptographically secure checksum that is
recorded in the git-annex branch. It's generated when using recorded in the git-annex branch. It's generated when using
eg `git-annex addurl --fast --verifiable`. eg `git-annex addurl --fast --verifiable`.

View file

@ -0,0 +1,54 @@
### Please describe the problem.
We are running 10.20240831-1~ndall+1 as assistant
```
reprostim@reproiner:/data/reprostim$ ps auxw | grep assist
reprost+ 1102847 2.1 0.3 1074496300 122428 ? Ssl Sep05 1225:13 /usr/lib/git-annex.linux/exe/git-annex --library-path /usr/lib/git-annex.linux//lib/x86_64-linux-gnu:/usr/lib/git-annex.linux//lib/x86_64-linux-gnu: /usr/lib/git-annex.linux/shimmed/git-annex/git-annex assistant
```
and our underlying script produces bunch of files online while later renaming them. i.e. we are producing
```
reprostim@reproiner:/data/reprostim$ ls -l Videos/2024/10/2024.10.15-14.02.40.327--.mkv*
-rw-r--r-- 1 reprostim reprostim 165285526 Oct 15 14:12 Videos/2024/10/2024.10.15-14.02.40.327--.mkv
-rw-r--r-- 2 reprostim reprostim 0 Oct 15 14:02 Videos/2024/10/2024.10.15-14.02.40.327--.mkv.duct_info.json
-rw-r--r-- 11 reprostim reprostim 8218 Oct 15 14:11 Videos/2024/10/2024.10.15-14.02.40.327--.mkv.duct_usage.json
-rw-r--r-- 1 reprostim reprostim 7036928 Oct 15 14:12 Videos/2024/10/2024.10.15-14.02.40.327--.mkv.log
```
and whenever done, renaming to something like
```
reprostim@reproiner:/data/reprostim$ ls -l Videos/2024/10/2024.10.15-14.02.27.765--2024.10.15-14.02.34.175.mkv*
-rw-r--r-- 2 reprostim reprostim 71 Oct 15 14:02 Videos/2024/10/2024.10.15-14.02.27.765--2024.10.15-14.02.34.175.mkv
-rw-r--r-- 3 reprostim reprostim 1841 Oct 15 14:02 Videos/2024/10/2024.10.15-14.02.27.765--2024.10.15-14.02.34.175.mkv.duct_info.json
-rw-r--r-- 3 reprostim reprostim 805 Oct 15 14:02 Videos/2024/10/2024.10.15-14.02.27.765--2024.10.15-14.02.34.175.mkv.duct_usage.json
-rw-r--r-- 2 reprostim reprostim 69 Oct 15 14:02 Videos/2024/10/2024.10.15-14.02.27.765--2024.10.15-14.02.34.175.mkv.log
```
but the problem is that git-annex does not add to git **some** of the `.duct_*.json` files, i.e. it does add some times but not always:
```
reprostim@reproiner:/data/reprostim$ git ls-tree -r HEAD | grep '2024.10.15-.*\.duct_.*\.json'
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 Videos/2024/10/2024.10.15-14.02.40.327--.mkv.duct_info.json
100644 blob efcd26e5fe702855b5fe583fc846fd21a27e0acc Videos/2024/10/2024.10.15-14.02.40.327--.mkv.duct_usage.json
reprostim@reproiner:/data/reprostim$ ls -ld Videos/2024/10/2024.10.15-.*\.duct_.*\.json
ls: cannot access 'Videos/2024/10/2024.10.15-.*.duct_.*.json': No such file or directory
reprostim@reproiner:/data/reprostim$ ls -ld Videos/2024/10/2024.10.15-*.duct_*.json
-rw-r--r-- 3 reprostim reprostim 1865 Oct 15 08:44 Videos/2024/10/2024.10.15-08.43.47.149--2024.10.15-08.44.13.219.mkv.duct_info.json
-rw-r--r-- 3 reprostim reprostim 1620 Oct 15 08:44 Videos/2024/10/2024.10.15-08.43.47.149--2024.10.15-08.44.13.219.mkv.duct_usage.json
-rw-r--r-- 3 reprostim reprostim 1874 Oct 15 08:47 Videos/2024/10/2024.10.15-08.46.22.715--2024.10.15-08.47.13.706.mkv.duct_info.json
-rw-r--r-- 3 reprostim reprostim 1623 Oct 15 08:47 Videos/2024/10/2024.10.15-08.46.22.715--2024.10.15-08.47.13.706.mkv.duct_usage.json
-rw-r--r-- 3 reprostim reprostim 1839 Oct 15 08:49 Videos/2024/10/2024.10.15-08.49.44.705--2024.10.15-08.49.51.117.mkv.duct_info.json
-rw-r--r-- 3 reprostim reprostim 805 Oct 15 08:49 Videos/2024/10/2024.10.15-08.49.44.705--2024.10.15-08.49.51.117.mkv.duct_usage.json
-rw-r--r-- 3 reprostim reprostim 1893 Oct 15 10:13 Videos/2024/10/2024.10.15-09.16.28.939--2024.10.15-10.13.16.646.mkv.duct_info.json
-rw-r--r-- 59 reprostim reprostim 47555 Oct 15 10:13 Videos/2024/10/2024.10.15-09.16.28.939--2024.10.15-10.13.16.646.mkv.duct_usage.json
-rw-r--r-- 3 reprostim reprostim 1841 Oct 15 14:02 Videos/2024/10/2024.10.15-14.02.27.765--2024.10.15-14.02.34.175.mkv.duct_info.json
-rw-r--r-- 3 reprostim reprostim 805 Oct 15 14:02 Videos/2024/10/2024.10.15-14.02.27.765--2024.10.15-14.02.34.175.mkv.duct_usage.json
-rw-r--r-- 2 reprostim reprostim 0 Oct 15 14:02 Videos/2024/10/2024.10.15-14.02.40.327--.mkv.duct_info.json
-rw-r--r-- 14 reprostim reprostim 10672 Oct 15 14:14 Videos/2024/10/2024.10.15-14.02.40.327--.mkv.duct_usage.json
```
[[!meta author=yoh]]
[[!tag projects/repronim]]

View file

@ -0,0 +1,68 @@
### Please describe the problem.
I am yet to grasp what is "the best" or "legit" functioning here but I think it should be consistent between `git add` and `git annex add`, but we observe that on an ACL powered filesystem, `git add` commits **without executable bit set**, whenever `git-annex add` even if adding to git, results in **executable bit set**.
Gory details
```
(datalad) [f006rq8@discovery-01 ds-perms]$ cat .gitattributes
* annex.backend=MD5E
**/.git* annex.largefiles=nothing
* annex.largefiles=((mimeencoding=binary)and(largerthan=0))
(datalad) [f006rq8@discovery-01 ds-perms]$ git --version
git version 2.39.2
(datalad) [f006rq8@discovery-01 ds-perms]$ git annex version --raw; echo
10.20240831+git21-gd717e9aca0-1~ndall+1
(datalad) [f006rq8@discovery-01 ds-perms]$ echo text > by-git-add
(datalad) [f006rq8@discovery-01 ds-perms]$ echo text > by-git-annex-add
(datalad) [f006rq8@discovery-01 ds-perms]$ git add by-git-add
(datalad) [f006rq8@discovery-01 ds-perms]$ git annex add by-git-annex-add
add by-git-annex-add (non-large file; adding content to git repository) ok
(recording state in git...)
(datalad) [f006rq8@discovery-01 ds-perms]$ git commit -m 'added two files, one with git-annex, one with git'
[master 1ed76d5] added two files, one with git-annex, one with git
2 files changed, 2 insertions(+)
create mode 100644 by-git-add
create mode 100755 by-git-annex-add
```
So the git added one committed with mode 100644 whenever git-annex added -- with 100755.
Both files appear "identical" at POSIX or ACL level of permissions:
```
(datalad) [f006rq8@discovery-01 ds-perms]$ nfs4_getfacl by-git-add by-git-annex-add
# file: by-git-add
A::OWNER@:rwadxtTnNcy
A:g:rc-DBIC-admin@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::d11124v@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::f002d6b@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::f00275v@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::d31548v@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::GROUP@:rwadxtTnNcy
A::d26427b@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A:g:rc-DartFSadmin@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
# file: by-git-annex-add
A::OWNER@:rwadxtTnNcy
A:g:rc-DBIC-admin@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::d11124v@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::f002d6b@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::f00275v@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::d31548v@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A::GROUP@:rwadxtTnNcy
A::d26427b@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
A:g:rc-DartFSadmin@KIEWIT.DARTMOUTH.EDU:rwadxtTnNcCoy
(datalad) [f006rq8@discovery-01 ds-perms]$ ls -l by-git-*
-rwxrwx---+ 1 f006rq8 rc-DBIC 5 Oct 16 15:05 by-git-add
-rwxrwx---+ 1 f006rq8 rc-DBIC 5 Oct 16 15:05 by-git-annex-add
```
[[!meta author=yoh]]
[[!tag projects/repronim]]

View file

@ -0,0 +1,57 @@
### Please describe the problem.
Somewhat not relevant details: We have freeze/thaw scripts configured at the level of a user
When operating on POSIX compliant filesystem (not ACL nfs4 one), our freeze/thaw scripts fail -- exit with non-0:
```
[f006rq8@discovery-01 ds000003-qc]$ /dartfs/rc/lab/D/DBIC/DBIC/archive/bin-annex/thaw-content code
Operation to request attribute not supported: code
Operation to request attribute not supported: code
Failed while inserting ACE(s).
An error occurred during recursive file tree walk.
[f006rq8@discovery-01 ds000003-qc]$ echo $?
1
```
but git-annex seems to not care and proceeds forward
```
[f006rq8@discovery-01 ds000003-qc]$ git annex version
git-annex version: 10.20240831+git21-gd717e9aca0-1~ndall+1
[f006rq8@discovery-01 ds000003-qc]$ dd if=/dev/random of=binary count=10
0+10 records in
0+1 records out
366 bytes copied, 3.99037 s, 0.1 kB/s
[f006rq8@discovery-01 ds000003-qc]$ file binary
binary: data
[f006rq8@discovery-01 ds000003-qc]$ git annex add binary
add binary
Operation to request attribute not supported: binary
Operation to request attribute not supported: /scratch/repro-AXDLDiY/ds000003-qc/binary
Failed to instantiate ACL.
An error occurred during recursive file tree walk.
100% 366 B 582 KiB/s 0s
Operation to request attribute not supported: .git/annex/objects/mj/mj/MD5E-s366--023ff41f7d20c35b2d78c69aa3ce088b
Operation to request attribute not supported: .git/annex/objects/mj/mj/MD5E-s366--023ff41f7d20c35b2d78c69aa3ce088b
Failed while inserting ACE(s).
An error occurred during recursive file tree walk.
Operation to request attribute not supported: .git/annex/objects/mj/mj/MD5E-s366--023ff41f7d20c35b2d78c69aa3ce088b/MD5E-s366--023ff41f7d20c35b2d78c69aa3ce088b
Operation to request attribute not supported: /scratch/repro-AXDLDiY/ds000003-qc/.git/annex/objects/mj/mj/MD5E-s366--023ff41f7d20c35b2d78c69aa3ce088b/MD5E-s366--023ff41f7d20c35b2d78c69aa3ce088b
Failed to instantiate ACL.
An error occurred during recursive file tree walk.
Operation to request attribute not supported: .git/annex/objects/mj/mj/MD5E-s366--023ff41f7d20c35b2d78c69aa3ce088b
Operation to request attribute not supported: /scratch/repro-AXDLDiY/ds000003-qc/.git/annex/objects/mj/mj/MD5E-s366--023ff41f7d20c35b2d78c69aa3ce088b
add binary ok
(recording state in git...)
[f006rq8@discovery-01 ds000003-qc]$ echo $?
0
```
even though it is like a "feature" for us, so that the same freeze/thaw could be specified at user level, I think generally this is not a desired behavior -- if underlying freeze/thaw script fails, git-annex also should fail!
[[!meta author=yoh]]
[[!tag projects/repronim]]

View file

@ -40,4 +40,4 @@ local repository version: 10
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
[[!tag projects/INM7]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="matrss"
avatar="http://cdn.libravatar.org/avatar/59541f50d845e5f81aff06e88a38b9de"
subject="comment 2"
date="2024-10-14T12:01:40Z"
content="""
I've solved my issue in forgejo-aneksajo by special-casing the lockcontent endpoint to return a 403 for unauthorized requests, instead of a 401 like all other POST endpoints (put, remove, etc.). I presume that this is also what `git annex p2phttp` itself does, is that right? Maybe the specific response expected by git-annex in case of an unauthorized requests could be part of the protocol specification...
"""]]

View file

@ -0,0 +1,19 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2024-10-15T16:33:06Z"
content="""
It's not a special case about locking. p2phttp always uses 403 when
the mode it's serving does not allow the class of action.
Eg with --unauth-appendonly a remove request will cause a 403 response.
And with --unauth-readonly any non-read request does.
The docs say:
"When authentication is successful but does not allow a request to be
performed, it will fail with 403 Forbidden."
A 401 does make git-annex prompt for a password. p2phttp responds to that
when --authenv is used and the client didn't basic authenticate.
"""]]

View file

@ -0,0 +1,20 @@
[[!comment format=mdwn
username="matrss"
avatar="http://cdn.libravatar.org/avatar/59541f50d845e5f81aff06e88a38b9de"
subject="comment 4"
date="2024-10-16T12:09:02Z"
content="""
I see. Is a combination of `--unauth-*` and `--authenv` supposed to work?
I just tested it and if I serve a repository with `git annex p2phttp --unauth-readonly --authenv-http -J2 --port 54321` and try to do a `git annex drop --from origin` then it responds with a 403 and doesn't ask for credentials, even though there is a user configured that has write permissions and dropping works without the `--unauth-readonly`. Even if I previously authenticated and have the credentials in my keyring it still 403s, as git-annex seems to always first try the request without authentication.
This means the `--unauth-readonly` option currently isn't \"allow unauthenticated read-access\", but \"*only* allow unauthenticated read-access, deny all writes\".
I think wanting anonymous read and authenticated write access is quite common, so maybe this should be supported?
This kind of thing starts to work as soon as p2phttp responds with 401 for non-read requests, prompting git-annex to ask for credentials, but then you get the issue that a drop on the client-side will try to lock, gets a 401, and asks for credentials, instead of falling back to the read-only way of dropping (which is where lockcontent is special: it isn't strictly necessary for a drop to succeed, compared to the other endpoints which have nothing meaningful to fallback to).
This is why I assumed that lockcontent was handled specially already, and maybe it should be?
I think the way it is written in the design document doesn't support the current behavior. It says \"When authentication is successful but does not allow a request to be performed, it will fail with 403 Forbidden.\" but authentication hasn't even been attempted before returning a 403 with `--unauth-readonly`. Instead, it also says \"When a request needs authentication, it will fail with 401 Unauthorized.\", which would apply to this situation (under the assumption that `--unauth-readonly` doesn't mean \"no authentication possible at all\", which I had).
"""]]

View file

@ -0,0 +1,28 @@
[[!comment format=mdwn
username="joey"
subject="""comment 5"""
date="2024-10-17T13:36:56Z"
content="""
> I see. Is a combination of `--unauth-*` and `--authenv` supposed to work?
I didn't consider combining the two in the current implementation, so
behavior is essentially undefined. It happens to check for `--unauth-*`
before `--authenv` currently.
> I think wanting anonymous read and authenticated write access is quite common, so maybe this should be supported?
Agreed.
> the issue that a drop on the client-side will try to lock, gets a 401,
> and asks for credentials, instead of falling back to the read-only way of
> dropping
Well there are benefits to it actually locking rather than the fallback. It
allows dropping in more situations. So falling back on a 401 does not seem
like a good idea to me.
It might be that lockcontent should be allowed in a readonly connection.
The only possible issue is that would allow an anon to keep an object locked
indefinitely as some kind of DOS attack, so long as they were willing to
keep a connection open for keeplocked.
"""]]

View file

@ -0,0 +1,11 @@
[[!comment format=mdwn
username="joey"
subject="""comment 6"""
date="2024-10-17T15:07:12Z"
content="""
I've implemented combining --unauth-readonly (or --unauth-appendonly) with
--authenv/--authenv-http.
Read-only drop locking in that configuration still needs to be addressed, it
does prompt for authentication currently.
"""]]

View file

@ -0,0 +1,51 @@
### Please describe the problem.
Started to happen recently:
```
git grep 'ould not find module .System.Posix.IO'
cron-20241016/build-windows.yaml-1613-1929a4fc-failed/0_build-package.txt:2024-10-16T03:47:07.6376436Z ##[error] Could not find module `System.Posix.IO'.
cron-20241016/build-windows.yaml-1613-1929a4fc-failed/build-package/18_Build git-annex.txt:2024-10-16T03:47:07.6376375Z ##[error] Could not find module `System.Posix.IO'.
cron-20241017/build-windows.yaml-1614-1929a4fc-failed/0_build-package.txt:2024-10-17T03:45:41.3251590Z ##[error] Could not find module `System.Posix.IO'.
cron-20241017/build-windows.yaml-1614-1929a4fc-failed/build-package/18_Build git-annex.txt:2024-10-17T03:45:41.3251517Z ##[error] Could not find module `System.Posix.IO'.
cron-20241018/build-windows.yaml-1615-844a1bc5-failed/0_build-package.txt:2024-10-18T03:46:27.8014646Z ##[error] Could not find module `System.Posix.IO'.
cron-20241018/build-windows.yaml-1615-844a1bc5-failed/build-package/18_Build git-annex.txt:2024-10-18T03:46:27.8014601Z ##[error] Could not find module `System.Posix.IO'.
```
```
2024-10-16T03:47:07.6264270Z [ 91 of 739] Compiling Utility.OpenFile
2024-10-16T03:47:07.6264781Z
2024-10-16T03:47:07.6314519Z D:\a\git-annex\git-annex\Utility\OpenFile.hs:11:1: error: [GHC-87110]
2024-10-16T03:47:07.6376436Z ##[error] Could not find module `System.Posix.IO'.
2024-10-16T03:47:07.6378352Z [ 92 of 739] Compiling Utility.FileSize
2024-10-16T03:47:07.6379064Z Use -v to see a list of the files searched for.
2024-10-16T03:47:07.6379741Z [ 93 of 739] Compiling Utility.FileMode
2024-10-16T03:47:07.6380363Z |
2024-10-16T03:47:07.6380751Z [ 94 of 739] Compiling Git.FileMode
2024-10-16T03:47:07.6381306Z 11 | import System.Posix.IO
2024-10-16T03:47:07.6381845Z [ 95 of 739] Compiling Types.Transitions
2024-10-16T03:47:07.6382417Z | ^^^^^^^^^^^^^^^^^^^^^^
```
full logs are on smaug or for a while on [github actions](https://github.com/datalad/git-annex/actions/workflows/build-windows.yaml)
### What steps will reproduce the problem?
### What version of git-annex are you using? On what operating system?
### Please provide any additional information below.
[[!format sh """
# If you can, paste a complete transcript of the problem occurring here.
# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log
# End of transcript or log.
"""]]
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
> [[fixed|done]] --[[Joey]]

View file

@ -407,6 +407,15 @@ sending SUCCESS, leading to a perhaps long delay on the client before an
upload finishes. Perhaps extend the P2P protocol with progress information upload finishes. Perhaps extend the P2P protocol with progress information
for the uploads? for the uploads?
To stream uploads via the proxy, storeKey would need its interface changed
to not read the object file itself, but read from eg a lazy ByteString.
Chunking and encryption would complicate that. Chunking seems fairly
straightforward since it uses a lazy ByteString internally.
storeExport would change similarly. The external special remote protocol
would also need a change if it was to support that.
----
Both of those file-based approaches need the proxy to have enough free disk Both of those file-based approaches need the proxy to have enough free disk
space to buffer the largest file, times the number of concurrent space to buffer the largest file, times the number of concurrent
uploads+downloads. So the proxy will need to check annex.diskreserve uploads+downloads. So the proxy will need to check annex.diskreserve

View file

@ -0,0 +1,11 @@
We have received [a request](https://alioth-lists.debian.net/pipermail/neurodebian-users/2024-October/001278.html) to enable Servant for the NeuroDebian builds of git-annex. ATM by default it is not enabled
```
Flag Servant
Description: Use the servant library, enabling using annex+http urls and git-annex p2phttp
```
before I patch it, I thought to ask if it is "safe" as would it cause any possible side-effects, or why it is not enabled by default? (unclear from description)
Thanks in advance for the clarification

View file

@ -0,0 +1,13 @@
[[!comment format=mdwn
username="joey"
subject="""comment 1"""
date="2024-10-17T14:07:02Z"
content="""
Yes, it's safe to enable this build flag. It makes the `git-annex p2phttp`
command work, but users still have to choose to run that server.
The only reason it's a build flag is that the version of servant needed is
not available in some old build environments. The flag actually should be
enabled by default when the necessary dependencies are installable
(in the case of a cabal build) or installed (in Debian's case).
"""]]

View file

@ -0,0 +1,28 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="comment 2"
date="2024-10-17T17:04:06Z"
content="""
you do have servant libraries in debian/control and they are present but the build we are getting is lacking Servant support:
```
pwd
/home/yoh/proj/datalad/ci/git-annex/builds/2024/10/cron-20241016/build-ubuntu.yaml-1678-1929a4fc-success/git-annex-debianstandalone-packages_10.20240927+git31-gc4dfeaef53_amd64
grep libghc-serv -r git-annex_10.20240927+git31-gc4dfeaef53-1~ndall+1_amd64.buildinfo
libghc-servant-client-core-dev (= 0.19-1+b4),
libghc-servant-client-dev (= 0.19-1+b4),
libghc-servant-dev (= 0.19-1+b4),
libghc-servant-server-dev (= 0.19.1-1+b4),
dpkg -x git-annex-standalone_10.20240927+git31-gc4dfeaef53-1\~ndall+1_amd64.deb X
X/usr/lib/git-annex.linux/git-annex version
git-annex version: 10.20240927+git31-gc4dfeaef53-1~ndall+1
build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Benchmark Feeds Testsuite S3 WebDAV
dependency versions: aws-0.22.1 bloomfilter-2.0.1.0 cryptonite-0.29 DAV-1.3.4 feed-1.3.2.1 ghc-9.0.2 http-client-0.7.13.1 persistent-sqlite-2.13.1.0 torrent-10000.1.1 uuid-1.3.15 yesod-1.6.2.1
...
```
where is the catch or did I misunderstand you statement?
"""]]

View file

@ -0,0 +1,12 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2024-10-17T17:28:20Z"
content="""
If you are building on debian stable the issue is probably the version of
stm bundled with ghc being too old. This build flag needs stm 2.5.1
(also warp 3.2.8 and warp-tls 3.2.2)
I've applied a patch that will let the build flag work with older versions
of stm (commit [[!commit 3a53c6012101dcb76786ee497ca4223a8e28ceab]]).
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="comment 4"
date="2024-10-17T18:37:57Z"
content="""
Coolio, thank you Joey! I will checkout tomorrow's builds
"""]]

View file

@ -0,0 +1,22 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="comment 5"
date="2024-10-17T21:44:04Z"
content="""
tried, FTBFS
```
2024-10-17T20:15:49.1836077Z
2024-10-17T20:15:49.1836378Z P2P/Http/State.hs:476:49: error:
2024-10-17T20:15:49.1847864Z * Variable not in scope: writeTMVar :: TMVar Bool -> Bool -> STM a0
2024-10-17T20:15:49.1848889Z * Perhaps you meant one of these:
2024-10-17T20:15:49.1849789Z `writeTVar' (imported from Control.Concurrent.STM),
2024-10-17T20:15:49.1850800Z `writeTChan' (imported from Control.Concurrent.STM)
2024-10-17T20:15:49.1851624Z |
2024-10-17T20:15:49.1852313Z 476 | writeTMVar lv False
2024-10-17T20:15:49.1853246Z | ^^^^^^^^^^
```
it is building within `docker://datalad/buildenv-git-annex` which is built using bookworm frozen to 20230829T151102Z via snapshots repo.
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="comment 7"
date="2024-10-17T21:44:50Z"
content="""
[full log on github](https://github.com/datalad/git-annex/actions/runs/11392272703/job/31697961630?pr=208)
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="joey"
subject="""comment 7"""
date="2024-10-18T00:56:14Z"
content="""
Ah, missed that call site. Fixed in
[[!commit b83fdf66dfe4caf52c097af8773d8f7470e48b5e]]
"""]]

View file

@ -0,0 +1,17 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="comment 8"
date="2024-10-18T13:01:25Z"
content="""
Great! Confirming that we got it!
```
pwd
/home/yoh/proj/datalad/ci/git-annex/builds/2024/10/cron-20241018/build-ubuntu.yaml-1683-844a1bc5-success
git grep -h 'build flags: .*Servant'
2024-10-18T03:11:57.0121660Z build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Servant Benchmark Feeds Testsuite S3 WebDAV
```
I will add a \"test\" to ensure we \"do not loose it\" from our builds ;-)
"""]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="comment 9"
date="2024-10-18T13:10:37Z"
content="""
should we expect Servant being built/enabled for Windows builds as well? (they [FTBFS ATM](https://git-annex.branchable.com/bugs/windows_FTBFS__58___Could_not_find_module_System.Posix/?updated) so do not get to \"build flags:\" to check)
"""]]

View file

@ -116,11 +116,17 @@ convenient way to download the content of any key, by using the path
Allows unauthenticated users to read the repository, but not make Allows unauthenticated users to read the repository, but not make
modifications to it. modifications to it.
This can be combined with `--authenv` or `--authenv-http` to allow
anonymous readonly access, and authenticated write access.
* `--unauth-appendonly` * `--unauth-appendonly`
Allows unauthenticated users to read the repository, and store data in Allows unauthenticated users to read the repository, and store data in
it, but not remove data from it. it, but not remove data from it.
This can be combined with `--authenv` or `--authenv-http` to allow
anonymous appendonly access, and authenticated remove access.
* `--wideopen` * `--wideopen`
Gives unauthenticated users full read+write+remove access to the Gives unauthenticated users full read+write+remove access to the

32
doc/projects/INM7.mdwn Normal file
View file

@ -0,0 +1,32 @@
The INM7 data hosting infrastructure uses git-annex. This is a tracking
page for issues relating to that project. It includes issues relating to
[forgejo-aneksajo](https://codeberg.org/matrss/forgejo-aneksajo).
TODOs
=====
[[!inline pages="todo/* and !todo/done and !link(todo/done) and
tagged(projects/INM7)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist]]
<details>
<summary>Done</summary>
[[!inline pages="todo/* and !todo/done and link(todo/done) and
tagged(projects/INM7)" feeds=no actions=yes archive=yes show=0 template=buglist]]
</details>
Bugs
====
[[!inline pages="bugs/* and !bugs/done and !link(bugs/done) and
tagged(projects/INM7)" sort=mtime feeds=no actions=yes archive=yes show=0 template=buglist template=buglist]]
<details>
<summary>Fixed</summary>
[[!inline pages="(bugs/* and !bugs/done and link(bugs/done)) and
tagged(projects/INM7)" feeds=no actions=yes archive=yes show=0 template=buglist]]
</details>

View file

@ -12,3 +12,5 @@ There is the gcrypt special remote (and it worked with the forgejo instance I tr
The advantage of having the annexed files but not the git repo encrypted is that the file tree, commit history, readme and all the things typically displayed by the site would still be viewable (communicating repository layout, contents), but GPG keys would be used to control practical access (possibly on top of site's access premissions). The advantage of having the annexed files but not the git repo encrypted is that the file tree, commit history, readme and all the things typically displayed by the site would still be viewable (communicating repository layout, contents), but GPG keys would be used to control practical access (possibly on top of site's access premissions).
Thanks in advance for considering! -- MSz Thanks in advance for considering! -- MSz
[[!tag projects/INM7]]

View file

@ -28,7 +28,11 @@ Planned schedule of work:
## work notes ## work notes
* Currently working on streaming download via proxy from special remote. * Currently working on streaming special remotes via proxy
in the `streamproxy` branch.
* Downloads from special remotes can stream (though using a temp file on
the proxy). Next: Streaming uploads via the proxy.
## completed items for October's work on streaming through proxy to special remotes ## completed items for October's work on streaming through proxy to special remotes

View file

@ -0,0 +1,5 @@
`git annex import` seems to unconditionally ignore all kinds of symbolic links in the source importtree remote. We have some "legacy" datasets that I would like to wholesale import into git-annex repositories, and they use symbolic links pointing at other files within the same directory to avoid some duplication.
Is there an option to make `git annex import` not ignore those symlinks, that I have overlooked? If not then this is a request to have such an option.
I think it could either resolve the symlink, check that the target is within the to-be-imported directory, and import it just like a regular file if it is, or it could import the symlink as-is with no modifications.

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="matrss"
avatar="http://cdn.libravatar.org/avatar/59541f50d845e5f81aff06e88a38b9de"
subject="comment 1"
date="2024-10-14T12:21:54Z"
content="""
It seems like I can make git-annex discover a newly added `annex.url` setting by unsetting `remote.<name>.annex-uuid` and then doing `git annex init` again or trying to copy something to the remote.
"""]]

View file

@ -0,0 +1,27 @@
If I try to specify custom scripts for freeze/thaw in `.git/config` of a repository with relative paths (since absolute paths are not robust to renames etc and thus IMHO should be avoided):
```
(datalad) [f006rq8@discovery-01 subdir]$ pwd
/dartfs/rc/lab/D/DBIC/DBIC/CON/asmacdo/tmp/test-local-thaw/subdir
(datalad) [f006rq8@discovery-01 subdir]$ git config get annex.thawcontent-command
.git/annex/thaw-content %path
(datalad) [f006rq8@discovery-01 subdir]$ git config get annex.freezecontent-command
.git/annex/freeze-content %path
```
their invocation fails when ran from subdirectory
```
[2024-10-16 14:47:08.941720897] (Annex.Perms) freezing content ../.git/annex/objects/6k/VJ/MD5E-s115--9a295e3f5f148380d74c3ff3ebdaa173/MD5E-s115--9a295e3f5f148380d74c3ff3ebdaa173
[2024-10-16 14:47:08.948171243] (Utility.Process) process [2572997] call: sh ["-c",".git/annex/freeze-content '../.git/annex/objects/6k/VJ/MD5E-s115--9a295e3f5f148380d74c3ff3ebdaa173/MD5E-s1
15--9a295e3f5f148380d74c3ff3ebdaa173'"]
sh: .git/annex/freeze-content: No such file or directory
```
I wonder if there could be a way added to be able to specify them relative to the top of the repository.
[[!meta author=yoh]]
[[!tag projects/repronim]]

View file

@ -0,0 +1,8 @@
[[!comment format=mdwn
username="yarikoptic"
avatar="http://cdn.libravatar.org/avatar/f11e9c84cb18d26a1748c33b48c924b4"
subject="comment 1"
date="2024-10-16T18:58:04Z"
content="""
as an alternative/complimentary idea -- could git-annex support simply having those scripts under `.git/hooks`, e.g. `.git/hooks/annex-{freeze,thaw}-content`?
"""]]

View file

@ -321,8 +321,7 @@ Executable git-annex
servant-client, servant-client,
servant-client-core, servant-client-core,
warp (>= 3.2.8), warp (>= 3.2.8),
warp-tls (>= 3.2.2), warp-tls (>= 3.2.2)
stm (>= 2.5.1)
CPP-Options: -DWITH_SERVANT CPP-Options: -DWITH_SERVANT
Other-Modules: Other-Modules:
Command.P2PHttp Command.P2PHttp
@ -1120,6 +1119,7 @@ Executable git-annex
Utility.SshConfig Utility.SshConfig
Utility.SshHost Utility.SshHost
Utility.StatelessOpenPGP Utility.StatelessOpenPGP
Utility.STM
Utility.Su Utility.Su
Utility.SystemDirectory Utility.SystemDirectory
Utility.Terminal Utility.Terminal