added retrieveKeyFileInOrder and ORDERED to external special remote protocol

I anticipate lots of external special remote programs will neglect
implementing this. Still, it's the right thing to do to assume that some
of them may write files out of order. Probably most external special
remotes will not be used with a proxy. When someone is using one with a
proxy, they can always get it fixed to send ORDERED.
This commit is contained in:
Joey Hess 2024-10-15 15:35:09 -04:00
parent f920d90781
commit d9b4bf4224
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
28 changed files with 80 additions and 13 deletions

View file

@ -248,12 +248,13 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
writeVerifyChunk iv h b
storetofile iv h (n - fromIntegral (B.length b)) bs
proxyget offset af k = withproxytmpfile k $ \tmpfile ->
proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
let retrieve = tryNonAsync $ Remote.retrieveKeyFile r k af
(fromRawFilePath tmpfile) nullMeterUpdate vc
in case fromKey keySize k of
ordered <- Remote.retrieveKeyFileInOrder r
case fromKey keySize k of
#ifndef mingw32_HOST_OS
Just size | size > 0 -> do
Just size | size > 0 && ordered -> do
cancelv <- liftIO newEmptyMVar
donev <- liftIO newEmptyMVar
streamer <- liftIO $ async $

View file

@ -1,6 +1,9 @@
git-annex (10.20240928) UNRELEASED; urgency=medium
* Sped up proxied downloads from special remotes, by streaming.
* Added GETORDERED request to external special remote protocol.
When the external special remote responds with ORDERED, it can stream
through a proxy.
-- Joey Hess <id@joeyh.name> Tue, 15 Oct 2024 12:12:18 -0400

View file

@ -81,6 +81,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = removeKeyDummy

View file

@ -69,6 +69,9 @@ gen r _ rc gc rs = do
, name = Git.repoDescribe r
, storeKey = uploadKey
, retrieveKeyFile = downloadKey
-- Bittorrent downloads out of order, but downloadTorrentContent
-- moves the downloaded file to the destination at the end.
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- Bittorrent does its own hash checks.
, retrievalSecurityPolicy = RetrievalAllKeysSecure

View file

@ -86,6 +86,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- Borg cryptographically verifies content.
, retrievalSecurityPolicy = RetrievalAllKeysSecure

View file

@ -78,6 +78,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- Bup uses git, which cryptographically verifies content
-- (with SHA1, but sufficiently for this).

View file

@ -79,6 +79,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- ddar communicates over ssh, not subject to http redirect
-- type attacks

View file

@ -98,6 +98,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = retrieveKeyFileCheapM dir chunkconfig
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = removeKeyDummy

View file

@ -68,7 +68,7 @@ gen rt externalprogram r u rc gc rs
| externalprogram' == ExternalType "readonly" = do
c <- parsedRemoteConfig remote rc
cst <- remoteCost gc c expensiveRemoteCost
let rmt = mk c cst (pure GloballyAvailable)
let rmt = mk c cst (pure True) (pure GloballyAvailable)
Nothing
(externalInfo externalprogram')
Nothing
@ -105,7 +105,9 @@ gen rt externalprogram r u rc gc rs
let cheapexportsupported = if exportsupported
then exportIsSupported
else exportUnsupported
let rmt = mk c cst (getAvailability external)
let rmt = mk c cst
(getOrdered external)
(getAvailability external)
(Just (whereisKeyM external))
(getInfoM external)
(Just (claimUrlM external))
@ -119,13 +121,14 @@ gen rt externalprogram r u rc gc rs
(checkPresentM external)
rmt
where
mk c cst avail towhereis togetinfo toclaimurl tocheckurl exportactions cheapexportsupported =
mk c cst ordered avail towhereis togetinfo toclaimurl tocheckurl exportactions cheapexportsupported =
Remote
{ uuid = u
, cost = cst
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = ordered
, retrieveKeyFileCheap = Nothing
-- External special remotes use many http libraries
-- and have no protection against redirects to
@ -801,6 +804,14 @@ getAvailability external = catchNonAsync query (const (pure defavail))
_ -> Nothing
defavail = GloballyAvailable
getOrdered :: External -> Annex Bool
getOrdered external = catchNonAsync query (const (pure False))
where
query = handleRequest external GETORDERED Nothing $ \req -> case req of
ORDERED -> result True
UNORDERED -> result False
_ -> result False
claimUrlM :: External -> URLString -> Annex Bool
claimUrlM external url =
handleRequest external (CLAIMURL url) Nothing $ \req -> case req of

View file

@ -168,6 +168,7 @@ data Request
| INITREMOTE
| GETCOST
| GETAVAILABILITY
| GETORDERED
| CLAIMURL URLString
| CHECKURL URLString
| TRANSFER Direction SafeKey FilePath
@ -200,6 +201,7 @@ instance Proto.Sendable Request where
formatMessage INITREMOTE = ["INITREMOTE"]
formatMessage GETCOST = ["GETCOST"]
formatMessage GETAVAILABILITY = ["GETAVAILABILITY"]
formatMessage GETORDERED = ["GETORDERED"]
formatMessage (CLAIMURL url) = [ "CLAIMURL", Proto.serialize url ]
formatMessage (CHECKURL url) = [ "CHECKURL", Proto.serialize url ]
formatMessage (TRANSFER direction key file) =
@ -248,6 +250,8 @@ data Response
| REMOVE_FAILURE Key ErrorMsg
| COST Cost
| AVAILABILITY Availability
| ORDERED
| UNORDERED
| INITREMOTE_SUCCESS
| INITREMOTE_FAILURE ErrorMsg
| CLAIMURL_SUCCESS
@ -284,6 +288,8 @@ instance Proto.Receivable Response where
parseCommand "REMOVE-FAILURE" = Proto.parse2 REMOVE_FAILURE
parseCommand "COST" = Proto.parse1 COST
parseCommand "AVAILABILITY" = Proto.parse1 AVAILABILITY
parseCommand "ORDERED" = Proto.parse0 ORDERED
parseCommand "UNORDERED" = Proto.parse0 UNORDERED
parseCommand "INITREMOTE-SUCCESS" = Proto.parse0 INITREMOTE_SUCCESS
parseCommand "INITREMOTE-FAILURE" = Proto.parse1 INITREMOTE_FAILURE
parseCommand "CLAIMURL-SUCCESS" = Proto.parse0 CLAIMURL_SUCCESS

View file

@ -140,6 +140,7 @@ gen' r u c gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = removeKeyDummy

View file

@ -210,6 +210,7 @@ gen r u rc gc rs
, name = Git.repoDescribe r
, storeKey = copyToRemote new st
, retrieveKeyFile = copyFromRemote new st
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = copyFromRemoteCheap st r
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = dropKey new st

View file

@ -105,6 +105,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- content stored on git-lfs is hashed with SHA256
-- no matter what git-annex key it's for, and the hash

View file

@ -81,6 +81,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- glacier-cli does not follow redirects and does
-- not support file://, as far as we know, but

View file

@ -62,6 +62,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure False
, retrieveKeyFileCheap = Nothing
-- A hook could use http and be vulnerable to
-- redirect to file:// attacks, etc.

View file

@ -67,6 +67,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = cannotModify
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- HttpManagerRestricted is used here, so this is
-- secure.

View file

@ -59,6 +59,7 @@ chainGen addr r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = store u gc protorunner
, retrieveKeyFile = retrieve gc protorunner
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = remove u protorunner

View file

@ -94,6 +94,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Just (retrieveCheap o)
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = removeKeyDummy

View file

@ -209,6 +209,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- HttpManagerRestricted is used here, so this is
-- secure.

View file

@ -89,6 +89,9 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = store rs hdl
, retrieveKeyFile = retrieve rs hdl
-- Unsure about whether tahoe might sometimes write chunks
-- out of order.
, retrieveKeyFileInOrder = pure False
, retrieveKeyFileCheap = Nothing
-- Tahoe cryptographically verifies content.
, retrievalSecurityPolicy = RetrievalAllKeysSecure

View file

@ -77,6 +77,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = uploadKey
, retrieveKeyFile = downloadKey urlincludeexclude
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- HttpManagerRestricted is used here, so this is
-- secure.

View file

@ -88,6 +88,7 @@ gen r u rc gc rs = do
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
-- HttpManagerRestricted is used here, so this is
-- secure.

View file

@ -98,6 +98,8 @@ data RemoteA a = Remote
-- sequentially to the file.)
-- Throws exception on failure.
, retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfigA a -> a Verification
{- Will retrieveKeyFile write to the file in order? -}
, retrieveKeyFileInOrder :: a Bool
-- Retrieves a key's contents to a tmp file, if it can be done cheaply.
-- It's ok to create a symlink or hardlink.
-- Throws exception on failure.

View file

@ -55,8 +55,8 @@ any extensions it wants to use.
Next, git-annex will generally send a message telling the special
remote to start up. (Or it might send an INITREMOTE or EXPORTSUPPORTED or
LISTCONFIGS, or perhaps other things in the future, so don't hardcode this
order.)
LISTCONFIGS, or perhaps other things in the future, so don't
hardcode this order.)
PREPARE
@ -118,7 +118,7 @@ The following requests *must* all be supported by the special remote.
* `PREPARE`
Tells the remote that it's time to prepare itself to be used.
Only a few requests for details about the remote can come before this
(EXTENSIONS, INITREMOTE, EXPORTSUPPORTED, and LISTCONFIGS,
(EXTENSIONS, INITREMOTE, EXPORTSUPPORTED and LISTCONFIGS,
but others may be added later).
* `PREPARE-SUCCESS`
Sent as a response to PREPARE once the special remote is ready for use.
@ -203,6 +203,15 @@ the special remote can reply with `UNSUPPORTED-REQUEST`.
(See Config/Cost.hs for some standard costs.)
* `COST Int`
Indicates the cost of the remote.
* `GETORDERED`
Asks the remote if it will always write files in order when performing a
`TRANSFER RETRIEVE`. Writing in order lets a proxy stream content from
the remote. When this is not implemented, git-annex assumes the
remote may write parts of the file out of order.
* `ORDERED`
Indicates that files are written in order.
* `UNORDERED`
Indicates that files are not written in order.
* `GETAVAILABILITY`
Asks the remote if it is locally or globally available.
(Ie stored in the cloud vs on a local disk.)
@ -221,6 +230,8 @@ the special remote can reply with `UNSUPPORTED-REQUEST`.
trying to use the remote.
Older versions of git-annex do not support this response, so avoid
sending it unless the `UNAVAILABLERESPONSE` extension is enabled.
* `ORDERED`
* `CLAIMURL Url`
Asks the remote if it wishes to claim responsibility for downloading
an url.

View file

@ -364,10 +364,6 @@ remote to the usual temp object file on the proxy, but without moving that
to the annex object file at the end. As the temp object file grows, stream
the content out via the proxy.
> This needs the same process to read and write the same file, which is
> disallowed in Haskell (without going lowlevel in a way that seems
> difficult).
Some special remotes will overwrite or truncate an existing temp object
file when starting a download. So the proxy should wait until the file is
growing to start streaming it.

View file

@ -36,6 +36,9 @@ When that branch is pushed to the proxy, it will update the tree exported
to the special remote. When files are copied to the remote via the proxy,
it will also update the exported tree.
Note that proxying for a special remote can temporarily use disk space
in the repository, to temporarily hold the content of annexed files.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.

View file

@ -222,6 +222,12 @@ while read line; do
;;
esac
;;
GETORDERED)
# This remote writes to files in order when
# retrieving them. If it didn't, it
# would be important to respond with UNORDERED.
echo ORDERED
;;
CHECKPRESENT)
key="$2"
calclocation "$key"

View file

@ -30,6 +30,12 @@ Planned schedule of work:
* Currently working on streaming download via proxy from special remote.
## completed items for October's work on streaming through proxy to special remotes
* Stream downloads through proxy for all special remotes that indicate
they download in order.
* Added ORDERED message to external special remote protocol.
## completed items for September's work on proving behavior of preferred content
* Static analysis to detect "not present", "not balanced", and similar