added retrieveKeyFileInOrder and ORDERED to external special remote protocol
I anticipate lots of external special remote programs will neglect implementing this. Still, it's the right thing to do to assume that some of them may write files out of order. Probably most external special remotes will not be used with a proxy. When someone is using one with a proxy, they can always get it fixed to send ORDERED.
This commit is contained in:
parent
f920d90781
commit
d9b4bf4224
28 changed files with 80 additions and 13 deletions
|
@ -248,12 +248,13 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
|
|||
writeVerifyChunk iv h b
|
||||
storetofile iv h (n - fromIntegral (B.length b)) bs
|
||||
|
||||
proxyget offset af k = withproxytmpfile k $ \tmpfile ->
|
||||
proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
|
||||
let retrieve = tryNonAsync $ Remote.retrieveKeyFile r k af
|
||||
(fromRawFilePath tmpfile) nullMeterUpdate vc
|
||||
in case fromKey keySize k of
|
||||
ordered <- Remote.retrieveKeyFileInOrder r
|
||||
case fromKey keySize k of
|
||||
#ifndef mingw32_HOST_OS
|
||||
Just size | size > 0 -> do
|
||||
Just size | size > 0 && ordered -> do
|
||||
cancelv <- liftIO newEmptyMVar
|
||||
donev <- liftIO newEmptyMVar
|
||||
streamer <- liftIO $ async $
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
git-annex (10.20240928) UNRELEASED; urgency=medium
|
||||
|
||||
* Sped up proxied downloads from special remotes, by streaming.
|
||||
* Added GETORDERED request to external special remote protocol.
|
||||
When the external special remote responds with ORDERED, it can stream
|
||||
through a proxy.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 15 Oct 2024 12:12:18 -0400
|
||||
|
||||
|
|
|
@ -81,6 +81,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
, removeKey = removeKeyDummy
|
||||
|
|
|
@ -69,6 +69,9 @@ gen r _ rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = uploadKey
|
||||
, retrieveKeyFile = downloadKey
|
||||
-- Bittorrent downloads out of order, but downloadTorrentContent
|
||||
-- moves the downloaded file to the destination at the end.
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- Bittorrent does its own hash checks.
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
|
|
|
@ -86,6 +86,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- Borg cryptographically verifies content.
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
|
|
|
@ -78,6 +78,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- Bup uses git, which cryptographically verifies content
|
||||
-- (with SHA1, but sufficiently for this).
|
||||
|
|
|
@ -79,6 +79,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- ddar communicates over ssh, not subject to http redirect
|
||||
-- type attacks
|
||||
|
|
|
@ -98,6 +98,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = retrieveKeyFileCheapM dir chunkconfig
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
, removeKey = removeKeyDummy
|
||||
|
|
|
@ -68,7 +68,7 @@ gen rt externalprogram r u rc gc rs
|
|||
| externalprogram' == ExternalType "readonly" = do
|
||||
c <- parsedRemoteConfig remote rc
|
||||
cst <- remoteCost gc c expensiveRemoteCost
|
||||
let rmt = mk c cst (pure GloballyAvailable)
|
||||
let rmt = mk c cst (pure True) (pure GloballyAvailable)
|
||||
Nothing
|
||||
(externalInfo externalprogram')
|
||||
Nothing
|
||||
|
@ -105,7 +105,9 @@ gen rt externalprogram r u rc gc rs
|
|||
let cheapexportsupported = if exportsupported
|
||||
then exportIsSupported
|
||||
else exportUnsupported
|
||||
let rmt = mk c cst (getAvailability external)
|
||||
let rmt = mk c cst
|
||||
(getOrdered external)
|
||||
(getAvailability external)
|
||||
(Just (whereisKeyM external))
|
||||
(getInfoM external)
|
||||
(Just (claimUrlM external))
|
||||
|
@ -119,13 +121,14 @@ gen rt externalprogram r u rc gc rs
|
|||
(checkPresentM external)
|
||||
rmt
|
||||
where
|
||||
mk c cst avail towhereis togetinfo toclaimurl tocheckurl exportactions cheapexportsupported =
|
||||
mk c cst ordered avail towhereis togetinfo toclaimurl tocheckurl exportactions cheapexportsupported =
|
||||
Remote
|
||||
{ uuid = u
|
||||
, cost = cst
|
||||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = ordered
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- External special remotes use many http libraries
|
||||
-- and have no protection against redirects to
|
||||
|
@ -801,6 +804,14 @@ getAvailability external = catchNonAsync query (const (pure defavail))
|
|||
_ -> Nothing
|
||||
defavail = GloballyAvailable
|
||||
|
||||
getOrdered :: External -> Annex Bool
|
||||
getOrdered external = catchNonAsync query (const (pure False))
|
||||
where
|
||||
query = handleRequest external GETORDERED Nothing $ \req -> case req of
|
||||
ORDERED -> result True
|
||||
UNORDERED -> result False
|
||||
_ -> result False
|
||||
|
||||
claimUrlM :: External -> URLString -> Annex Bool
|
||||
claimUrlM external url =
|
||||
handleRequest external (CLAIMURL url) Nothing $ \req -> case req of
|
||||
|
|
6
Remote/External/Types.hs
vendored
6
Remote/External/Types.hs
vendored
|
@ -168,6 +168,7 @@ data Request
|
|||
| INITREMOTE
|
||||
| GETCOST
|
||||
| GETAVAILABILITY
|
||||
| GETORDERED
|
||||
| CLAIMURL URLString
|
||||
| CHECKURL URLString
|
||||
| TRANSFER Direction SafeKey FilePath
|
||||
|
@ -200,6 +201,7 @@ instance Proto.Sendable Request where
|
|||
formatMessage INITREMOTE = ["INITREMOTE"]
|
||||
formatMessage GETCOST = ["GETCOST"]
|
||||
formatMessage GETAVAILABILITY = ["GETAVAILABILITY"]
|
||||
formatMessage GETORDERED = ["GETORDERED"]
|
||||
formatMessage (CLAIMURL url) = [ "CLAIMURL", Proto.serialize url ]
|
||||
formatMessage (CHECKURL url) = [ "CHECKURL", Proto.serialize url ]
|
||||
formatMessage (TRANSFER direction key file) =
|
||||
|
@ -248,6 +250,8 @@ data Response
|
|||
| REMOVE_FAILURE Key ErrorMsg
|
||||
| COST Cost
|
||||
| AVAILABILITY Availability
|
||||
| ORDERED
|
||||
| UNORDERED
|
||||
| INITREMOTE_SUCCESS
|
||||
| INITREMOTE_FAILURE ErrorMsg
|
||||
| CLAIMURL_SUCCESS
|
||||
|
@ -284,6 +288,8 @@ instance Proto.Receivable Response where
|
|||
parseCommand "REMOVE-FAILURE" = Proto.parse2 REMOVE_FAILURE
|
||||
parseCommand "COST" = Proto.parse1 COST
|
||||
parseCommand "AVAILABILITY" = Proto.parse1 AVAILABILITY
|
||||
parseCommand "ORDERED" = Proto.parse0 ORDERED
|
||||
parseCommand "UNORDERED" = Proto.parse0 UNORDERED
|
||||
parseCommand "INITREMOTE-SUCCESS" = Proto.parse0 INITREMOTE_SUCCESS
|
||||
parseCommand "INITREMOTE-FAILURE" = Proto.parse1 INITREMOTE_FAILURE
|
||||
parseCommand "CLAIMURL-SUCCESS" = Proto.parse0 CLAIMURL_SUCCESS
|
||||
|
|
|
@ -140,6 +140,7 @@ gen' r u c gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
, removeKey = removeKeyDummy
|
||||
|
|
|
@ -210,6 +210,7 @@ gen r u rc gc rs
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = copyToRemote new st
|
||||
, retrieveKeyFile = copyFromRemote new st
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = copyFromRemoteCheap st r
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
, removeKey = dropKey new st
|
||||
|
|
|
@ -105,6 +105,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- content stored on git-lfs is hashed with SHA256
|
||||
-- no matter what git-annex key it's for, and the hash
|
||||
|
|
|
@ -81,6 +81,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- glacier-cli does not follow redirects and does
|
||||
-- not support file://, as far as we know, but
|
||||
|
|
|
@ -62,6 +62,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure False
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- A hook could use http and be vulnerable to
|
||||
-- redirect to file:// attacks, etc.
|
||||
|
|
|
@ -67,6 +67,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = cannotModify
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- HttpManagerRestricted is used here, so this is
|
||||
-- secure.
|
||||
|
|
|
@ -59,6 +59,7 @@ chainGen addr r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = store u gc protorunner
|
||||
, retrieveKeyFile = retrieve gc protorunner
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
, removeKey = remove u protorunner
|
||||
|
|
|
@ -94,6 +94,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Just (retrieveCheap o)
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
, removeKey = removeKeyDummy
|
||||
|
|
|
@ -209,6 +209,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- HttpManagerRestricted is used here, so this is
|
||||
-- secure.
|
||||
|
|
|
@ -89,6 +89,9 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = store rs hdl
|
||||
, retrieveKeyFile = retrieve rs hdl
|
||||
-- Unsure about whether tahoe might sometimes write chunks
|
||||
-- out of order.
|
||||
, retrieveKeyFileInOrder = pure False
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- Tahoe cryptographically verifies content.
|
||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||
|
|
|
@ -77,6 +77,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = uploadKey
|
||||
, retrieveKeyFile = downloadKey urlincludeexclude
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- HttpManagerRestricted is used here, so this is
|
||||
-- secure.
|
||||
|
|
|
@ -88,6 +88,7 @@ gen r u rc gc rs = do
|
|||
, name = Git.repoDescribe r
|
||||
, storeKey = storeKeyDummy
|
||||
, retrieveKeyFile = retrieveKeyFileDummy
|
||||
, retrieveKeyFileInOrder = pure True
|
||||
, retrieveKeyFileCheap = Nothing
|
||||
-- HttpManagerRestricted is used here, so this is
|
||||
-- secure.
|
||||
|
|
|
@ -98,6 +98,8 @@ data RemoteA a = Remote
|
|||
-- sequentially to the file.)
|
||||
-- Throws exception on failure.
|
||||
, retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfigA a -> a Verification
|
||||
{- Will retrieveKeyFile write to the file in order? -}
|
||||
, retrieveKeyFileInOrder :: a Bool
|
||||
-- Retrieves a key's contents to a tmp file, if it can be done cheaply.
|
||||
-- It's ok to create a symlink or hardlink.
|
||||
-- Throws exception on failure.
|
||||
|
|
|
@ -55,8 +55,8 @@ any extensions it wants to use.
|
|||
|
||||
Next, git-annex will generally send a message telling the special
|
||||
remote to start up. (Or it might send an INITREMOTE or EXPORTSUPPORTED or
|
||||
LISTCONFIGS, or perhaps other things in the future, so don't hardcode this
|
||||
order.)
|
||||
LISTCONFIGS, or perhaps other things in the future, so don't
|
||||
hardcode this order.)
|
||||
|
||||
PREPARE
|
||||
|
||||
|
@ -118,7 +118,7 @@ The following requests *must* all be supported by the special remote.
|
|||
* `PREPARE`
|
||||
Tells the remote that it's time to prepare itself to be used.
|
||||
Only a few requests for details about the remote can come before this
|
||||
(EXTENSIONS, INITREMOTE, EXPORTSUPPORTED, and LISTCONFIGS,
|
||||
(EXTENSIONS, INITREMOTE, EXPORTSUPPORTED and LISTCONFIGS,
|
||||
but others may be added later).
|
||||
* `PREPARE-SUCCESS`
|
||||
Sent as a response to PREPARE once the special remote is ready for use.
|
||||
|
@ -203,6 +203,15 @@ the special remote can reply with `UNSUPPORTED-REQUEST`.
|
|||
(See Config/Cost.hs for some standard costs.)
|
||||
* `COST Int`
|
||||
Indicates the cost of the remote.
|
||||
* `GETORDERED`
|
||||
Asks the remote if it will always write files in order when performing a
|
||||
`TRANSFER RETRIEVE`. Writing in order lets a proxy stream content from
|
||||
the remote. When this is not implemented, git-annex assumes the
|
||||
remote may write parts of the file out of order.
|
||||
* `ORDERED`
|
||||
Indicates that files are written in order.
|
||||
* `UNORDERED`
|
||||
Indicates that files are not written in order.
|
||||
* `GETAVAILABILITY`
|
||||
Asks the remote if it is locally or globally available.
|
||||
(Ie stored in the cloud vs on a local disk.)
|
||||
|
@ -221,6 +230,8 @@ the special remote can reply with `UNSUPPORTED-REQUEST`.
|
|||
trying to use the remote.
|
||||
Older versions of git-annex do not support this response, so avoid
|
||||
sending it unless the `UNAVAILABLERESPONSE` extension is enabled.
|
||||
* `ORDERED`
|
||||
|
||||
* `CLAIMURL Url`
|
||||
Asks the remote if it wishes to claim responsibility for downloading
|
||||
an url.
|
||||
|
|
|
@ -364,10 +364,6 @@ remote to the usual temp object file on the proxy, but without moving that
|
|||
to the annex object file at the end. As the temp object file grows, stream
|
||||
the content out via the proxy.
|
||||
|
||||
> This needs the same process to read and write the same file, which is
|
||||
> disallowed in Haskell (without going lowlevel in a way that seems
|
||||
> difficult).
|
||||
|
||||
Some special remotes will overwrite or truncate an existing temp object
|
||||
file when starting a download. So the proxy should wait until the file is
|
||||
growing to start streaming it.
|
||||
|
|
|
@ -36,6 +36,9 @@ When that branch is pushed to the proxy, it will update the tree exported
|
|||
to the special remote. When files are copied to the remote via the proxy,
|
||||
it will also update the exported tree.
|
||||
|
||||
Note that proxying for a special remote can temporarily use disk space
|
||||
in the repository, to temporarily hold the content of annexed files.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
* The [[git-annex-common-options]](1) can be used.
|
||||
|
|
6
doc/special_remotes/external/example.sh
vendored
6
doc/special_remotes/external/example.sh
vendored
|
@ -222,6 +222,12 @@ while read line; do
|
|||
;;
|
||||
esac
|
||||
;;
|
||||
GETORDERED)
|
||||
# This remote writes to files in order when
|
||||
# retrieving them. If it didn't, it
|
||||
# would be important to respond with UNORDERED.
|
||||
echo ORDERED
|
||||
;;
|
||||
CHECKPRESENT)
|
||||
key="$2"
|
||||
calclocation "$key"
|
||||
|
|
|
@ -30,6 +30,12 @@ Planned schedule of work:
|
|||
|
||||
* Currently working on streaming download via proxy from special remote.
|
||||
|
||||
## completed items for October's work on streaming through proxy to special remotes
|
||||
|
||||
* Stream downloads through proxy for all special remotes that indicate
|
||||
they download in order.
|
||||
* Added ORDERED message to external special remote protocol.
|
||||
|
||||
## completed items for September's work on proving behavior of preferred content
|
||||
|
||||
* Static analysis to detect "not present", "not balanced", and similar
|
||||
|
|
Loading…
Reference in a new issue