Added http special remote, which is useful for accessing other remotes that publish content stored in them via http/https.
With automatic layout learning!
This commit is contained in:
parent
fccc9ab442
commit
571ec900ac
15 changed files with 287 additions and 5 deletions
|
@ -59,7 +59,7 @@ branchHashDir = hashDirLower . branchHashLevels
|
||||||
- which do not allow using a directory "XX" when "xx" already exists.
|
- which do not allow using a directory "XX" when "xx" already exists.
|
||||||
- To support that, some git-annex repositories use the lower case-hash.
|
- To support that, some git-annex repositories use the lower case-hash.
|
||||||
- All special remotes use the lower-case hash for new data, but old data
|
- All special remotes use the lower-case hash for new data, but old data
|
||||||
- may still used the mixed case hash. -}
|
- may still use the mixed case hash. -}
|
||||||
dirHashes :: [HashLevels -> Hasher]
|
dirHashes :: [HashLevels -> Hasher]
|
||||||
dirHashes = [hashDirLower, hashDirMixed]
|
dirHashes = [hashDirLower, hashDirMixed]
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
git-annex (8.20200815) UNRELEASED; urgency=medium
|
git-annex (8.20200815) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
|
* Added http special remote, which is useful for accessing other remotes
|
||||||
|
that publish content stored in them via http/https.
|
||||||
* The external special remote protocol got an ASYNC extension.
|
* The external special remote protocol got an ASYNC extension.
|
||||||
This can be used by an external special remote to let a single process
|
This can be used by an external special remote to let a single process
|
||||||
perform concurrent actions, rather than multiple processes being
|
perform concurrent actions, rather than multiple processes being
|
||||||
|
|
181
Remote/Http.hs
Normal file
181
Remote/Http.hs
Normal file
|
@ -0,0 +1,181 @@
|
||||||
|
{- Http remote (readonly).
|
||||||
|
-
|
||||||
|
- Copyright 2020 Joey Hess <id@joeyh.name>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Remote.Http (remote) where
|
||||||
|
|
||||||
|
import Annex.Common
|
||||||
|
import Types.Remote
|
||||||
|
import Types.ProposedAccepted
|
||||||
|
import Remote.Helper.Messages
|
||||||
|
import Remote.Helper.ExportImport
|
||||||
|
import Remote.Helper.Special
|
||||||
|
import qualified Git
|
||||||
|
import Annex.Content
|
||||||
|
import Config.Cost
|
||||||
|
import Config
|
||||||
|
import Logs.Web
|
||||||
|
import Creds
|
||||||
|
import Utility.Metered
|
||||||
|
import qualified Annex.Url as Url
|
||||||
|
import Annex.SpecialRemote.Config
|
||||||
|
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import System.FilePath.Posix as P
|
||||||
|
import Control.Concurrent.STM
|
||||||
|
|
||||||
|
remote :: RemoteType
|
||||||
|
remote = RemoteType
|
||||||
|
{ typename = "http"
|
||||||
|
, enumerate = const (findSpecialRemotes "http")
|
||||||
|
, generate = gen
|
||||||
|
, configParser = mkRemoteConfigParser
|
||||||
|
[ optionalStringParser urlField
|
||||||
|
(FieldDesc "(required) url to the remote content")
|
||||||
|
]
|
||||||
|
, setup = httpSetup
|
||||||
|
, exportSupported = exportUnsupported
|
||||||
|
, importSupported = importUnsupported
|
||||||
|
}
|
||||||
|
|
||||||
|
urlField :: RemoteConfigField
|
||||||
|
urlField = Accepted "url"
|
||||||
|
|
||||||
|
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
|
||||||
|
gen r u rc gc rs = do
|
||||||
|
c <- parsedRemoteConfig remote rc
|
||||||
|
cst <- remoteCost gc expensiveRemoteCost
|
||||||
|
let url = getRemoteConfigValue urlField c
|
||||||
|
ll <- liftIO newLearnedLayout
|
||||||
|
return $ Just $ this url ll c cst
|
||||||
|
where
|
||||||
|
this url ll c cst = Remote
|
||||||
|
{ uuid = u
|
||||||
|
, cost = cst
|
||||||
|
, name = Git.repoDescribe r
|
||||||
|
, storeKey = uploadKey
|
||||||
|
, retrieveKeyFile = downloadKey url ll
|
||||||
|
, retrieveKeyFileCheap = Nothing
|
||||||
|
-- HttpManagerRestricted is used here, so this is
|
||||||
|
-- secure.
|
||||||
|
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||||
|
, removeKey = dropKey
|
||||||
|
, lockContent = Nothing
|
||||||
|
, checkPresent = checkKey url ll (this url ll c cst)
|
||||||
|
, checkPresentCheap = False
|
||||||
|
, exportActions = exportUnsupported
|
||||||
|
, importActions = importUnsupported
|
||||||
|
, whereisKey = Nothing
|
||||||
|
, remoteFsck = Nothing
|
||||||
|
, repairRepo = Nothing
|
||||||
|
, config = c
|
||||||
|
, gitconfig = gc
|
||||||
|
, localpath = Nothing
|
||||||
|
, getRepo = return r
|
||||||
|
, readonly = True
|
||||||
|
, appendonly = False
|
||||||
|
, availability = GloballyAvailable
|
||||||
|
, remotetype = remote
|
||||||
|
, mkUnavailable = return Nothing
|
||||||
|
, getInfo = return []
|
||||||
|
, claimUrl = Nothing
|
||||||
|
, checkUrl = Nothing
|
||||||
|
, remoteStateHandle = rs
|
||||||
|
}
|
||||||
|
|
||||||
|
httpSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
|
||||||
|
httpSetup _ Nothing _ _ _ =
|
||||||
|
error "Must use --sameas when initializing a http remote."
|
||||||
|
httpSetup _ (Just u) _ c gc = do
|
||||||
|
_url <- maybe (giveup "Specify url=")
|
||||||
|
(return . fromProposedAccepted)
|
||||||
|
(M.lookup urlField c)
|
||||||
|
(c', _encsetup) <- encryptionSetup c gc
|
||||||
|
gitConfigSpecialRemote u c' [("http", "true")]
|
||||||
|
return (c', u)
|
||||||
|
|
||||||
|
downloadKey :: Maybe URLString -> LearnedLayout -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Verification
|
||||||
|
downloadKey baseurl ll key _af dest p = do
|
||||||
|
unlessM (urlAction baseurl ll key go) $
|
||||||
|
giveup "download failed"
|
||||||
|
return UnVerified
|
||||||
|
where
|
||||||
|
go url = Url.withUrlOptions $ downloadUrl key p [url] dest
|
||||||
|
|
||||||
|
uploadKey :: Key -> AssociatedFile -> MeterUpdate -> Annex ()
|
||||||
|
uploadKey _ _ _ = giveup "upload to http special remote not supported"
|
||||||
|
|
||||||
|
dropKey :: Key -> Annex ()
|
||||||
|
dropKey _ = giveup "removal from http special remote not supported"
|
||||||
|
|
||||||
|
checkKey :: Maybe URLString -> LearnedLayout -> Remote -> Key -> Annex Bool
|
||||||
|
checkKey baseurl ll r key = do
|
||||||
|
showChecking r
|
||||||
|
urlAction baseurl ll key $ \url ->
|
||||||
|
Url.withUrlOptions $ Url.checkBoth url (fromKey keySize key)
|
||||||
|
|
||||||
|
type LearnedLayout = TVar (Maybe [Key -> URLString])
|
||||||
|
|
||||||
|
newLearnedLayout :: IO LearnedLayout
|
||||||
|
newLearnedLayout = newTVarIO Nothing
|
||||||
|
|
||||||
|
-- Learns which layout the special remote uses, so the once any
|
||||||
|
-- action on an url succeeds, subsequent calls will continue to use that
|
||||||
|
-- layout (or related layouts).
|
||||||
|
urlAction :: Maybe URLString -> LearnedLayout -> Key -> (URLString -> Annex Bool) -> Annex Bool
|
||||||
|
urlAction (Just baseurl) ll key a = liftIO (readTVarIO ll) >>= \case
|
||||||
|
Just learned -> go False [learned]
|
||||||
|
Nothing -> go True (supportedLayouts baseurl)
|
||||||
|
where
|
||||||
|
go _learn [] = return False
|
||||||
|
go learn (layouts:rest) = go' learn layouts [] <||> go learn rest
|
||||||
|
|
||||||
|
go' _ [] _ = return False
|
||||||
|
go' learn (layout:rest) prevs =
|
||||||
|
ifM (a (layout key))
|
||||||
|
( do
|
||||||
|
when learn $ do
|
||||||
|
let learned = layout:prevs++rest
|
||||||
|
liftIO $ atomically $
|
||||||
|
writeTVar ll (Just learned)
|
||||||
|
return True
|
||||||
|
, go' learn rest (layout:prevs)
|
||||||
|
)
|
||||||
|
|
||||||
|
-- cannot normally happen
|
||||||
|
urlAction Nothing _ _ _ = giveup "no url configured for http special remote"
|
||||||
|
|
||||||
|
-- Different ways that keys can be laid out in the special remote,
|
||||||
|
-- with the more common first.
|
||||||
|
--
|
||||||
|
-- This is a nested list, because a single remote may use more than one
|
||||||
|
-- layout. In particular, old versions of git-annex used hashDirMixed
|
||||||
|
-- for some special remotes, before switching to hashDirLower for new data.
|
||||||
|
-- So, when learning the layout, both need to be tried.
|
||||||
|
supportedLayouts :: URLString -> [[Key -> URLString]]
|
||||||
|
supportedLayouts baseurl =
|
||||||
|
-- Layout used for bare git-annex repos, and for many
|
||||||
|
-- special remotes like directory.
|
||||||
|
[ [ \k -> mkurl k (hashDirLower (HashLevels 2)) P.</> kf k
|
||||||
|
-- Layout used for non-bare git-annex repos, and for some old
|
||||||
|
-- special remotes.
|
||||||
|
, \k -> mkurl k (hashDirMixed (HashLevels 2)) P.</> kf k
|
||||||
|
]
|
||||||
|
-- Special remotes that do not need hash directories.
|
||||||
|
, [ \k -> baseurl P.</> kf k ]
|
||||||
|
-- Layouts without a key directory, used by some special remotes.
|
||||||
|
, [ \k -> mkurl k (hashDirLower def)
|
||||||
|
, \k -> mkurl k (hashDirMixed def)
|
||||||
|
]
|
||||||
|
-- Layouts with only 1 level of hash directory,
|
||||||
|
-- rather than the default 2.
|
||||||
|
, [ \k -> mkurl k (hashDirLower (HashLevels 1))
|
||||||
|
, \k -> mkurl k (hashDirMixed (HashLevels 1))
|
||||||
|
]
|
||||||
|
]
|
||||||
|
where
|
||||||
|
mkurl k hasher = baseurl P.</> fromRawFilePath (hasher k) P.</> kf k
|
||||||
|
kf k = fromRawFilePath (keyFile k)
|
|
@ -41,6 +41,7 @@ import qualified Remote.Tahoe
|
||||||
import qualified Remote.Glacier
|
import qualified Remote.Glacier
|
||||||
import qualified Remote.Ddar
|
import qualified Remote.Ddar
|
||||||
import qualified Remote.GitLFS
|
import qualified Remote.GitLFS
|
||||||
|
import qualified Remote.Http
|
||||||
import qualified Remote.Hook
|
import qualified Remote.Hook
|
||||||
import qualified Remote.External
|
import qualified Remote.External
|
||||||
|
|
||||||
|
@ -65,6 +66,7 @@ remoteTypes = map adjustExportImportRemoteType
|
||||||
, Remote.Glacier.remote
|
, Remote.Glacier.remote
|
||||||
, Remote.Ddar.remote
|
, Remote.Ddar.remote
|
||||||
, Remote.GitLFS.remote
|
, Remote.GitLFS.remote
|
||||||
|
, Remote.Http.remote
|
||||||
, Remote.Hook.remote
|
, Remote.Hook.remote
|
||||||
, Remote.External.remote
|
, Remote.External.remote
|
||||||
]
|
]
|
||||||
|
|
|
@ -1547,6 +1547,11 @@ Remotes are configured using these settings in `.git/config`.
|
||||||
|
|
||||||
It is set to "true" if this is a git-lfs remote.
|
It is set to "true" if this is a git-lfs remote.
|
||||||
|
|
||||||
|
* `remote.<name>.annex-http`
|
||||||
|
|
||||||
|
Used to identify http special remotes.
|
||||||
|
Normally this is automatically set up by `git annex initremote`.
|
||||||
|
|
||||||
* `remote.<name>.annex-externaltype`
|
* `remote.<name>.annex-externaltype`
|
||||||
|
|
||||||
Used external special remotes to record the type of the remote.
|
Used external special remotes to record the type of the remote.
|
||||||
|
|
|
@ -21,7 +21,7 @@ the git history is not stored in them.
|
||||||
* [[S3]] (Amazon S3, and other compatible services)
|
* [[S3]] (Amazon S3, and other compatible services)
|
||||||
* [[tahoe]]
|
* [[tahoe]]
|
||||||
* [[tor]]
|
* [[tor]]
|
||||||
* [[web]]
|
* [[web]] and [[http]]
|
||||||
* [[webdav]]
|
* [[webdav]]
|
||||||
* [[git]]
|
* [[git]]
|
||||||
* [[xmpp]]
|
* [[xmpp]]
|
||||||
|
|
28
doc/special_remotes/http.mdwn
Normal file
28
doc/special_remotes/http.mdwn
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
This special remote allows downloading annexed objects from other remotes
|
||||||
|
that expose their content by http. Not to be confused with the [[web]]
|
||||||
|
special remote, this one is only useful in combination with some other
|
||||||
|
special remote.
|
||||||
|
|
||||||
|
Suppose, for example, that you have a [[directory]] special remote. And the
|
||||||
|
directory happens to be published by a web server. (Or it could be a
|
||||||
|
[[rsync]] special remote, or many other kinds.) To let git-annex know that
|
||||||
|
the content of this special remote can also be accessed over http, set up
|
||||||
|
a http special remote.
|
||||||
|
|
||||||
|
git annex initremote --sameas=foo foo-http type=http url=http://example.com/foo
|
||||||
|
|
||||||
|
The --sameas parameter tells git-annex what other special remote this http
|
||||||
|
remote is accessing. (See [[tips/multiple_remotes_accessing_the_same_data_store]].)
|
||||||
|
Since the http remote is read-only, it can only be used to download content
|
||||||
|
that is stored in that other remote.
|
||||||
|
|
||||||
|
This special remote is compatible with many, but not all, other special
|
||||||
|
remotes. If the special remote does something unusual with the name
|
||||||
|
a file is stored under, or with how the data is stored, it might not work.
|
||||||
|
See [[tips/multiple_remotes_accessing_the_same_data_store]]
|
||||||
|
for a list of known working combinations.
|
||||||
|
|
||||||
|
## configuration
|
||||||
|
|
||||||
|
* `url` - The http or https url to where the content is stored by the
|
||||||
|
other special remote.
|
|
@ -1,4 +1,5 @@
|
||||||
git-annex can use the WWW as a special remote, downloading urls to files.
|
git-annex can use the WWW as a special remote, associating an url with an
|
||||||
|
annexed file, and downloading the file content from the web.
|
||||||
See [[tips/using_the_web_as_a_special_remote]] for usage examples.
|
See [[tips/using_the_web_as_a_special_remote]] for usage examples.
|
||||||
|
|
||||||
## notes
|
## notes
|
||||||
|
@ -7,5 +8,6 @@ Currently git-annex only supports downloading content from the web;
|
||||||
it cannot upload to it or remove content.
|
it cannot upload to it or remove content.
|
||||||
|
|
||||||
This special remote uses urls on the web as the source for content.
|
This special remote uses urls on the web as the source for content.
|
||||||
git-annex can also download content from a normal git remote, accessible by
|
There are several other ways http can be used to download annexed objects,
|
||||||
http.
|
including a git remote accessible by http, S3 with a `publicurl` configured,
|
||||||
|
and the [[http]] special remote.
|
||||||
|
|
|
@ -55,4 +55,7 @@ If you find combinations that work, please edit this page to list them.
|
||||||
## known working combinations
|
## known working combinations
|
||||||
|
|
||||||
* directory and rsync
|
* directory and rsync
|
||||||
|
* http and directory
|
||||||
|
* http and rsync
|
||||||
|
* http and rclone (any layout except for frankencase)
|
||||||
|
|
||||||
|
|
|
@ -15,3 +15,5 @@ access of other special remotes whose data stores are exposed via http.
|
||||||
|
|
||||||
Call it "http" maybe. (There may be some confusion between this and the web
|
Call it "http" maybe. (There may be some confusion between this and the web
|
||||||
special remote by users looking for such a thing.) --[[Joey]]
|
special remote by users looking for such a thing.) --[[Joey]]
|
||||||
|
|
||||||
|
> http special remote implemented, [[done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 5"""
|
||||||
|
date="2020-09-01T19:00:05Z"
|
||||||
|
content="""
|
||||||
|
I'm implementing this with an automatic learning of the layout that a
|
||||||
|
special remote uses.
|
||||||
|
|
||||||
|
It looks like it will probably be sufficiently fast
|
||||||
|
for it to remember only for the duration of the command, although it would
|
||||||
|
be possible to cache what it's learned in .git/config or something.
|
||||||
|
|
||||||
|
Usually, the learning will add 6 extra http requests to learn
|
||||||
|
the most unlikely layout (hashDirMixed with 1 hash level, which I doubt
|
||||||
|
anything actually uses). Since the first file it tries to access is almost
|
||||||
|
certainly present in the special remote, it will then learn the layout and
|
||||||
|
keep using it with no added overhead.
|
||||||
|
|
||||||
|
The unusual case would be if a lot of files are not present in the
|
||||||
|
remote any longer. Then it will fail to learn, on each file, and so will
|
||||||
|
always make 6 extra http requests per file processed. (Reusing the same
|
||||||
|
http connection at least.) This seems unusual enough to not worry about
|
||||||
|
remembering what it's learned for longer than a single run, or making the
|
||||||
|
layout explicitly configurable. Content would have to be dropped from the
|
||||||
|
other special remote and the git-annex branch not be synced up for it to
|
||||||
|
happen.
|
||||||
|
"""]]
|
|
@ -0,0 +1,4 @@
|
||||||
|
The http special remote doesn't currently support being used with a
|
||||||
|
--sameas remote that uses exporttree=yes.
|
||||||
|
|
||||||
|
It seems like this should be fairly easy to implement. --[[Joey]]
|
|
@ -3,3 +3,6 @@
|
||||||
[[!meta author=yoh]]
|
[[!meta author=yoh]]
|
||||||
[[!tag projects/datalad]]
|
[[!tag projects/datalad]]
|
||||||
[[!tag needsthought]]
|
[[!tag needsthought]]
|
||||||
|
|
||||||
|
> [[done]] by implementing another design, not the one suggested here
|
||||||
|
> --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,22 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 6"""
|
||||||
|
date="2020-09-01T19:09:30Z"
|
||||||
|
content="""
|
||||||
|
I've implemented the http special remote, that can be combined with other
|
||||||
|
special remotes to access them using anonymous http.
|
||||||
|
|
||||||
|
I think that probably addresses this todo well enough to close it.
|
||||||
|
(Although I didn't get around to
|
||||||
|
[[todo/make_http_special_remote_support_exporttree_remotes]] yet, and this
|
||||||
|
todo mentions supporting exporttree. Should be easy to add later though.)
|
||||||
|
|
||||||
|
There are probably some special remotes that are unusual enough that the
|
||||||
|
http special remote can't support them, which it would make sense to add a
|
||||||
|
publicurl= config to, like S3 has. (Although I think S3 itself could now be
|
||||||
|
used with the http special remote so its option is vestigal now.)
|
||||||
|
|
||||||
|
I guess that publicurl= config would best be added to the individual
|
||||||
|
special remote, so it doesn't need any particular support in git-annex to
|
||||||
|
add it.
|
||||||
|
"""]]
|
|
@ -980,6 +980,7 @@ Executable git-annex
|
||||||
Remote.Helper.ReadOnly
|
Remote.Helper.ReadOnly
|
||||||
Remote.Helper.Special
|
Remote.Helper.Special
|
||||||
Remote.Helper.Ssh
|
Remote.Helper.Ssh
|
||||||
|
Remote.Http
|
||||||
Remote.Hook
|
Remote.Hook
|
||||||
Remote.List
|
Remote.List
|
||||||
Remote.List.Util
|
Remote.List.Util
|
||||||
|
|
Loading…
Add table
Reference in a new issue