2014-01-08 20:14:37 +00:00
|
|
|
{- Tahoe-LAFS special remotes.
|
|
|
|
-
|
|
|
|
- Tahoe capabilities for accessing objects stored in the remote
|
|
|
|
- are preserved in the remote state log.
|
|
|
|
-
|
|
|
|
- In order to allow multiple clones of a repository to access the same
|
|
|
|
- tahoe repository, git-annex needs to store the introducer furl,
|
|
|
|
- and the shared-convergence-secret. These are stored in the remote
|
|
|
|
- configuration, when embedcreds is enabled.
|
|
|
|
-
|
|
|
|
- Using those creds, git-annex sets up a tahoe configuration directory in
|
2015-06-09 19:29:16 +00:00
|
|
|
- ~/.tahoe-git-annex/UUID/
|
2014-01-08 20:14:37 +00:00
|
|
|
-
|
|
|
|
- Tahoe has its own encryption, so git-annex's encryption is not used.
|
|
|
|
-
|
2020-06-26 18:23:21 +00:00
|
|
|
- Copyright 2014-2020 Joey Hess <id@joeyh.name>
|
2014-01-08 20:14:37 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2014-01-08 20:14:37 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
|
|
|
|
module Remote.Tahoe (remote) where
|
|
|
|
|
|
|
|
import qualified Data.Map as M
|
Fix mangling of --json output of utf-8 characters when not running in a utf-8 locale
As long as all code imports Utility.Aeson rather than Data.Aeson,
and no Strings that may contain utf-8 characters are used for eg, object
keys via T.pack, this is guaranteed to fix the problem everywhere that
git-annex generates json.
It's kind of annoying to need to wrap ToJSON with a ToJSON', especially
since every data type that has a ToJSON instance has to be ported over.
However, that only took 50 lines of code, which is worth it to ensure full
coverage. I initially tried an alternative approach of a newtype FileEncoded,
which had to be used everywhere a String was fed into aeson, and chasing
down all the sites would have been far too hard. Did consider creating an
intentionally overlapping instance ToJSON String, and letting ghc fail
to build anything that passed in a String, but am not sure that wouldn't
pollute some library that git-annex depends on that happens to use ToJSON
String internally.
This commit was supported by the NSF-funded DataLad project.
2018-04-16 19:42:45 +00:00
|
|
|
import Utility.Aeson
|
2014-01-08 20:14:37 +00:00
|
|
|
import Data.ByteString.Lazy.UTF8 (fromString)
|
2014-01-08 23:17:18 +00:00
|
|
|
import Control.Concurrent.STM
|
2014-01-08 20:14:37 +00:00
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2014-01-08 20:14:37 +00:00
|
|
|
import Types.Remote
|
2014-02-11 18:06:50 +00:00
|
|
|
import Types.Creds
|
2020-01-10 18:10:20 +00:00
|
|
|
import Types.ProposedAccepted
|
2020-06-26 18:23:21 +00:00
|
|
|
import Types.NumCopies
|
2014-01-08 20:14:37 +00:00
|
|
|
import qualified Git
|
|
|
|
import Config
|
|
|
|
import Config.Cost
|
2020-01-10 18:10:20 +00:00
|
|
|
import Annex.SpecialRemote.Config
|
2014-01-08 20:14:37 +00:00
|
|
|
import Remote.Helper.Special
|
2019-02-20 19:55:01 +00:00
|
|
|
import Remote.Helper.ExportImport
|
2014-01-08 20:14:37 +00:00
|
|
|
import Annex.UUID
|
|
|
|
import Annex.Content
|
|
|
|
import Logs.RemoteState
|
|
|
|
import Utility.UserInfo
|
|
|
|
import Utility.Metered
|
|
|
|
import Utility.Env
|
2014-01-08 23:58:47 +00:00
|
|
|
import Utility.ThreadScheduler
|
2014-01-08 20:14:37 +00:00
|
|
|
|
2014-01-08 23:17:18 +00:00
|
|
|
{- The TMVar is left empty until tahoe has been verified to be running. -}
|
|
|
|
data TahoeHandle = TahoeHandle TahoeConfigDir (TMVar ())
|
|
|
|
|
2014-01-08 20:14:37 +00:00
|
|
|
type TahoeConfigDir = FilePath
|
|
|
|
type SharedConvergenceSecret = String
|
|
|
|
type IntroducerFurl = String
|
|
|
|
type Capability = String
|
|
|
|
|
|
|
|
remote :: RemoteType
|
2020-01-14 19:41:34 +00:00
|
|
|
remote = specialRemoteType $ RemoteType
|
2017-09-07 17:45:31 +00:00
|
|
|
{ typename = "tahoe"
|
|
|
|
, enumerate = const (findSpecialRemotes "tahoe")
|
|
|
|
, generate = gen
|
2020-01-14 19:41:34 +00:00
|
|
|
, configParser = mkRemoteConfigParser
|
|
|
|
[ optionalStringParser scsField
|
2020-01-20 19:20:04 +00:00
|
|
|
(FieldDesc "optional, normally a unique one is generated")
|
|
|
|
, optionalStringParser furlField HiddenField
|
2020-01-14 19:41:34 +00:00
|
|
|
]
|
2017-09-07 17:45:31 +00:00
|
|
|
, setup = tahoeSetup
|
|
|
|
, exportSupported = exportUnsupported
|
2019-02-20 19:55:01 +00:00
|
|
|
, importSupported = importUnsupported
|
add thirdPartyPopulated interface
This is to support, eg a borg repo as a special remote, which is
populated not by running git-annex commands, but by using borg. Then
git-annex sync lists the content of the remote, learns which files are
annex objects, and treats those as present in the remote.
So, most of the import machinery is reused, to a new purpose. While
normally importtree maintains a remote tracking branch, this does not,
because the files stored in the remote are annex object files, not
user-visible filenames. But, internally, a git tree is still generated,
of the files on the remote that are annex objects. This tree is used
by retrieveExportWithContentIdentifier, etc. As with other import/export
remotes, that the tree is recorded in the export log, and gets grafted
into the git-annex branch.
importKey changed to be able to return Nothing, to indicate when an
ImportLocation is not an annex object and so should be skipped from
being included in the tree.
It did not seem to make sense to have git-annex import do this, since
from the user's perspective, it's not like other imports. So only
git-annex sync does it.
Note that, git-annex sync does not yet download objects from such
remotes that are preferred content. importKeys is run with
content downloading disabled, to avoid getting the content of all
objects. Perhaps what's needed is for seekSyncContent to be run with these
remotes, but I don't know if it will just work (in particular, it needs
to avoid trying to transfer objects to them), so I skipped that for now.
(Untested and unused as of yet.)
This commit was sponsored by Jochen Bartl on Patreon.
2020-12-18 18:52:57 +00:00
|
|
|
, thirdPartyPopulated = False
|
2017-09-07 17:45:31 +00:00
|
|
|
}
|
2014-01-08 20:14:37 +00:00
|
|
|
|
2020-01-14 19:41:34 +00:00
|
|
|
scsField :: RemoteConfigField
|
|
|
|
scsField = Accepted "shared-convergence-secret"
|
|
|
|
|
|
|
|
furlField :: RemoteConfigField
|
|
|
|
furlField = Accepted "introducer-furl"
|
|
|
|
|
fix encryption of content to gcrypt and git-lfs
Fix serious regression in gcrypt and encrypted git-lfs remotes.
Since version 7.20200202.7, git-annex incorrectly stored content
on those remotes without encrypting it.
Problem was, Remote.Git enumerates all git remotes, including git-lfs
and gcrypt. It then dispatches to those. So, Remote.List used the
RemoteConfigParser from Remote.Git, instead of from git-lfs or gcrypt,
and that parser does not know about encryption fields, so did not
include them in the ParsedRemoteConfig. (Also didn't include other
fields specific to those remotes, perhaps chunking etc also didn't
get through.)
To fix, had to move RemoteConfig parsing down into the generate methods
of each remote, rather than doing it in Remote.List.
And a consequence of that was that ParsedRemoteConfig had to change to
include the RemoteConfig that got parsed, so that testremote can
generate a new remote based on an existing remote.
(I would have rather fixed this just inside Remote.Git, but that was not
practical, at least not w/o re-doing work that Remote.List already did.
Big ugly mostly mechanical patch seemed preferable to making git-annex
slower.)
2020-02-26 21:20:56 +00:00
|
|
|
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
|
|
|
|
gen r u rc gc rs = do
|
|
|
|
c <- parsedRemoteConfig remote rc
|
2023-01-12 17:42:28 +00:00
|
|
|
cst <- remoteCost gc c expensiveRemoteCost
|
2014-01-08 23:17:18 +00:00
|
|
|
hdl <- liftIO $ TahoeHandle
|
|
|
|
<$> maybe (defaultTahoeConfigDir u) return (remoteAnnexTahoe gc)
|
|
|
|
<*> newEmptyTMVarIO
|
2014-12-16 19:26:13 +00:00
|
|
|
return $ Just $ Remote
|
|
|
|
{ uuid = u
|
|
|
|
, cost = cst
|
|
|
|
, name = Git.repoDescribe r
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
, storeKey = store rs hdl
|
|
|
|
, retrieveKeyFile = retrieve rs hdl
|
2020-05-13 21:05:56 +00:00
|
|
|
, retrieveKeyFileCheap = Nothing
|
2018-06-21 15:35:27 +00:00
|
|
|
-- Tahoe cryptographically verifies content.
|
|
|
|
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
2014-12-16 19:26:13 +00:00
|
|
|
, removeKey = remove
|
2020-06-26 18:23:21 +00:00
|
|
|
, lockContent = Just $ lockKey u rs hdl
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
, checkPresent = checkKey rs hdl
|
2014-12-16 19:26:13 +00:00
|
|
|
, checkPresentCheap = False
|
2017-09-01 17:02:07 +00:00
|
|
|
, exportActions = exportUnsupported
|
2019-02-20 19:55:01 +00:00
|
|
|
, importActions = importUnsupported
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
, whereisKey = Just (getWhereisKey rs)
|
2014-12-16 19:26:13 +00:00
|
|
|
, remoteFsck = Nothing
|
|
|
|
, repairRepo = Nothing
|
|
|
|
, config = c
|
2018-06-04 18:31:55 +00:00
|
|
|
, getRepo = return r
|
2014-12-16 19:26:13 +00:00
|
|
|
, gitconfig = gc
|
|
|
|
, localpath = Nothing
|
|
|
|
, readonly = False
|
2018-08-30 15:12:18 +00:00
|
|
|
, appendonly = False
|
2020-12-28 19:08:53 +00:00
|
|
|
, untrustworthy = False
|
2023-08-16 18:31:31 +00:00
|
|
|
, availability = pure GloballyAvailable
|
2014-12-16 19:26:13 +00:00
|
|
|
, remotetype = remote
|
|
|
|
, mkUnavailable = return Nothing
|
|
|
|
, getInfo = return []
|
|
|
|
, claimUrl = Nothing
|
|
|
|
, checkUrl = Nothing
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
, remoteStateHandle = rs
|
2014-12-16 19:26:13 +00:00
|
|
|
}
|
2014-01-08 20:14:37 +00:00
|
|
|
|
2017-02-07 18:35:58 +00:00
|
|
|
tahoeSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
|
|
|
|
tahoeSetup _ mu _ c _ = do
|
2020-01-14 19:41:34 +00:00
|
|
|
furl <- maybe (fromMaybe missingfurl $ M.lookup furlField c) Proposed
|
2014-01-08 20:14:37 +00:00
|
|
|
<$> liftIO (getEnv "TAHOE_FURL")
|
|
|
|
u <- maybe (liftIO genUUID) return mu
|
|
|
|
configdir <- liftIO $ defaultTahoeConfigDir u
|
2020-01-10 18:10:20 +00:00
|
|
|
scs <- liftIO $ tahoeConfigure configdir
|
|
|
|
(fromProposedAccepted furl)
|
2020-01-14 19:41:34 +00:00
|
|
|
(fromProposedAccepted <$> (M.lookup scsField c))
|
add LISTCONFIGS to external special remote protocol
Special remote programs that use GETCONFIG/SETCONFIG are recommended
to implement it.
The description is not yet used, but will be useful later when adding a way
to make initremote list all accepted configs.
configParser now takes a RemoteConfig parameter. Normally, that's not
needed, because configParser returns a parter, it does not parse it
itself. But, it's needed to look at externaltype and work out what
external remote program to run for LISTCONFIGS.
Note that, while externalUUID is changed to a Maybe UUID, checkExportSupported
used to use NoUUID. The code that now checks for Nothing used to behave
in some undefined way if the external program made requests that
triggered it.
Also, note that in externalSetup, once it generates external,
it parses the RemoteConfig strictly. That generates a
ParsedRemoteConfig, which is thrown away. The reason it's ok to throw
that away, is that, if the strict parse succeeded, the result must be
the same as the earlier, lenient parse.
initremote of an external special remote now runs the program three
times. First for LISTCONFIGS, then EXPORTSUPPORTED, and again
LISTCONFIGS+INITREMOTE. It would not be hard to eliminate at least
one of those, and it should be possible to only run the program once.
2020-01-17 19:30:14 +00:00
|
|
|
pc <- either giveup return . parseRemoteConfig c =<< configParser remote c
|
2020-01-14 19:41:34 +00:00
|
|
|
let c' = if embedCreds pc
|
|
|
|
then flip M.union c $ M.fromList
|
|
|
|
[ (furlField, furl)
|
|
|
|
, (scsField, Proposed scs)
|
|
|
|
]
|
|
|
|
else c
|
2018-03-27 16:41:57 +00:00
|
|
|
gitConfigSpecialRemote u c' [("tahoe", configdir)]
|
2014-01-08 20:14:37 +00:00
|
|
|
return (c', u)
|
|
|
|
where
|
2016-11-16 01:29:54 +00:00
|
|
|
missingfurl = giveup "Set TAHOE_FURL to the introducer furl to use."
|
2014-01-08 20:14:37 +00:00
|
|
|
|
2024-07-01 14:42:27 +00:00
|
|
|
store :: RemoteStateHandle -> TahoeHandle -> Key -> AssociatedFile -> Maybe FilePath -> MeterUpdate -> Annex ()
|
|
|
|
store rs hdl k _af o _p = sendAnnex k o noop $ \src _sz ->
|
2014-01-08 23:17:18 +00:00
|
|
|
parsePut <$> liftIO (readTahoe hdl "put" [File src]) >>= maybe
|
2020-05-13 18:03:00 +00:00
|
|
|
(giveup "tahoe failed to store content")
|
|
|
|
(\cap -> storeCapability rs k cap)
|
2014-01-08 20:14:37 +00:00
|
|
|
|
2021-08-17 16:41:36 +00:00
|
|
|
retrieve :: RemoteStateHandle -> TahoeHandle -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
|
|
|
|
retrieve rs hdl k _f d _p _ = do
|
2020-05-13 21:05:56 +00:00
|
|
|
go =<< getCapability rs k
|
2021-02-09 17:42:16 +00:00
|
|
|
-- Tahoe verifies the content it retrieves using cryptographically
|
|
|
|
-- secure methods.
|
|
|
|
return Verified
|
2014-01-08 20:14:37 +00:00
|
|
|
where
|
2020-05-13 21:05:56 +00:00
|
|
|
go Nothing = giveup "tahoe capability is not known"
|
|
|
|
go (Just cap) = unlessM (liftIO $ requestTahoe hdl "get" [Param cap, File d]) $
|
|
|
|
giveup "tahoe failed to reteieve content"
|
2014-01-08 20:14:37 +00:00
|
|
|
|
toward SafeDropProof expiry checking
Added Maybe POSIXTime to SafeDropProof, which gets set when the proof is
based on a LockedCopy. If there are several LockedCopies, it uses the
closest expiry time. That is not optimal, it may be that the proof
expires based on one LockedCopy but another one has not expired. But
that seems unlikely to really happen, and anyway the user can just
re-run a drop if it fails due to expiry.
Pass the SafeDropProof to removeKey, which is responsible for checking
it for expiry in situations where that could be a problem. Which really
only means in Remote.Git.
Made Remote.Git check expiry when dropping from a local remote.
Checking expiry when dropping from a P2P remote is not yet implemented.
P2P.Protocol.remove has SafeDropProof plumbed through to it for that
purpose.
Fixing the remaining 2 build warnings should complete this work.
Note that the use of a POSIXTime here means that if the clock gets set
forward while git-annex is in the middle of a drop, it may say that
dropping took too long. That seems ok. Less ok is that if the clock gets
turned back a sufficient amount (eg 5 minutes), proof expiry won't be
noticed. It might be better to use the Monotonic clock, but that doesn't
advance when a laptop is suspended, and while there is the linux
Boottime clock, that is not available on other systems. Perhaps a
combination of POSIXTime and the Monotonic clock could detect laptop
suspension and also detect clock being turned back?
There is a potential future flag day where
p2pDefaultLockContentRetentionDuration is not assumed, but is probed
using the P2P protocol, and peers that don't support it can no longer
produce a LockedCopy. Until that happens, when git-annex is
communicating with older peers there is a risk of data loss when
a ssh connection closes during LOCKCONTENT.
2024-07-04 16:23:46 +00:00
|
|
|
remove :: Maybe SafeDropProof -> Key -> Annex ()
|
|
|
|
remove _ _ = giveup "content cannot be removed from tahoe remote"
|
2014-01-08 20:14:37 +00:00
|
|
|
|
2020-06-26 18:23:21 +00:00
|
|
|
-- Since content cannot be removed from tahoe (by git-annex),
|
|
|
|
-- nothing needs to be done to lock content there, except for checking that
|
|
|
|
-- it is actually present.
|
|
|
|
lockKey :: UUID -> RemoteStateHandle -> TahoeHandle -> Key -> (VerifiedCopy -> Annex a) -> Annex a
|
|
|
|
lockKey u rs hrl k callback =
|
|
|
|
ifM (checkKey rs hrl k)
|
toward SafeDropProof expiry checking
Added Maybe POSIXTime to SafeDropProof, which gets set when the proof is
based on a LockedCopy. If there are several LockedCopies, it uses the
closest expiry time. That is not optimal, it may be that the proof
expires based on one LockedCopy but another one has not expired. But
that seems unlikely to really happen, and anyway the user can just
re-run a drop if it fails due to expiry.
Pass the SafeDropProof to removeKey, which is responsible for checking
it for expiry in situations where that could be a problem. Which really
only means in Remote.Git.
Made Remote.Git check expiry when dropping from a local remote.
Checking expiry when dropping from a P2P remote is not yet implemented.
P2P.Protocol.remove has SafeDropProof plumbed through to it for that
purpose.
Fixing the remaining 2 build warnings should complete this work.
Note that the use of a POSIXTime here means that if the clock gets set
forward while git-annex is in the middle of a drop, it may say that
dropping took too long. That seems ok. Less ok is that if the clock gets
turned back a sufficient amount (eg 5 minutes), proof expiry won't be
noticed. It might be better to use the Monotonic clock, but that doesn't
advance when a laptop is suspended, and while there is the linux
Boottime clock, that is not available on other systems. Perhaps a
combination of POSIXTime and the Monotonic clock could detect laptop
suspension and also detect clock being turned back?
There is a potential future flag day where
p2pDefaultLockContentRetentionDuration is not assumed, but is probed
using the P2P protocol, and peers that don't support it can no longer
produce a LockedCopy. Until that happens, when git-annex is
communicating with older peers there is a risk of data loss when
a ssh connection closes during LOCKCONTENT.
2024-07-04 16:23:46 +00:00
|
|
|
( withVerifiedCopy LockedCopy u (return (Right True)) callback
|
2020-06-26 18:23:21 +00:00
|
|
|
, giveup $ "content seems to be missing from tahoe remote"
|
|
|
|
)
|
|
|
|
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
checkKey :: RemoteStateHandle -> TahoeHandle -> Key -> Annex Bool
|
|
|
|
checkKey rs hdl k = go =<< getCapability rs k
|
2014-01-08 20:14:37 +00:00
|
|
|
where
|
2014-08-06 17:45:19 +00:00
|
|
|
go Nothing = return False
|
|
|
|
go (Just cap) = liftIO $ do
|
|
|
|
v <- parseCheck <$> readTahoe hdl "check"
|
|
|
|
[ Param "--raw"
|
|
|
|
, Param cap
|
|
|
|
]
|
2016-11-16 01:29:54 +00:00
|
|
|
either giveup return v
|
2014-01-08 20:14:37 +00:00
|
|
|
|
|
|
|
defaultTahoeConfigDir :: UUID -> IO TahoeConfigDir
|
|
|
|
defaultTahoeConfigDir u = do
|
|
|
|
h <- myHomeDir
|
2015-06-09 19:29:16 +00:00
|
|
|
return $ h </> ".tahoe-git-annex" </> fromUUID u
|
2014-01-08 20:14:37 +00:00
|
|
|
|
|
|
|
tahoeConfigure :: TahoeConfigDir -> IntroducerFurl -> Maybe SharedConvergenceSecret -> IO SharedConvergenceSecret
|
|
|
|
tahoeConfigure configdir furl mscs = do
|
|
|
|
unlessM (createClient configdir furl) $
|
2016-11-16 01:29:54 +00:00
|
|
|
giveup "tahoe create-client failed"
|
2014-01-08 20:14:37 +00:00
|
|
|
maybe noop (writeSharedConvergenceSecret configdir) mscs
|
|
|
|
startTahoeDaemon configdir
|
|
|
|
getSharedConvergenceSecret configdir
|
|
|
|
|
|
|
|
createClient :: TahoeConfigDir -> IntroducerFurl -> IO Bool
|
|
|
|
createClient configdir furl = do
|
2020-10-30 14:29:42 +00:00
|
|
|
createDirectoryIfMissing True $
|
|
|
|
fromRawFilePath $ parentDir $ toRawFilePath configdir
|
2014-01-08 20:14:37 +00:00
|
|
|
boolTahoe configdir "create-client"
|
|
|
|
[ Param "--nickname", Param "git-annex"
|
|
|
|
, Param "--introducer", Param furl
|
|
|
|
]
|
|
|
|
|
|
|
|
writeSharedConvergenceSecret :: TahoeConfigDir -> SharedConvergenceSecret -> IO ()
|
|
|
|
writeSharedConvergenceSecret configdir scs =
|
|
|
|
writeFile (convergenceFile configdir) (unlines [scs])
|
|
|
|
|
|
|
|
{- The tahoe daemon writes the convergenceFile shortly after it starts
|
|
|
|
- (it does not need to connect to the network). So, try repeatedly to read
|
|
|
|
- the file, for up to 1 minute. To avoid reading a partially written
|
|
|
|
- file, look for the newline after the value. -}
|
|
|
|
getSharedConvergenceSecret :: TahoeConfigDir -> IO SharedConvergenceSecret
|
|
|
|
getSharedConvergenceSecret configdir = go (60 :: Int)
|
|
|
|
where
|
2014-10-09 18:53:13 +00:00
|
|
|
f = convergenceFile configdir
|
2014-01-08 20:14:37 +00:00
|
|
|
go n
|
2016-11-16 01:29:54 +00:00
|
|
|
| n == 0 = giveup $ "tahoe did not write " ++ f ++ " after 1 minute. Perhaps the daemon failed to start?"
|
2014-01-08 20:14:37 +00:00
|
|
|
| otherwise = do
|
|
|
|
v <- catchMaybeIO (readFile f)
|
|
|
|
case v of
|
|
|
|
Just s | "\n" `isSuffixOf` s || "\r" `isSuffixOf` s ->
|
2015-05-10 19:41:41 +00:00
|
|
|
return $ takeWhile (`notElem` ("\n\r" :: String)) s
|
2014-01-08 23:58:47 +00:00
|
|
|
_ -> do
|
|
|
|
threadDelaySeconds (Seconds 1)
|
|
|
|
go (n - 1)
|
2014-01-08 20:14:37 +00:00
|
|
|
|
|
|
|
convergenceFile :: TahoeConfigDir -> FilePath
|
|
|
|
convergenceFile configdir = configdir </> "private" </> "convergence"
|
|
|
|
|
|
|
|
startTahoeDaemon :: TahoeConfigDir -> IO ()
|
|
|
|
startTahoeDaemon configdir = void $ boolTahoe configdir "start" []
|
|
|
|
|
2014-01-08 23:17:18 +00:00
|
|
|
{- Ensures that tahoe has been started, before running an action
|
|
|
|
- that uses it. -}
|
|
|
|
withTahoeConfigDir :: TahoeHandle -> (TahoeConfigDir -> IO a) -> IO a
|
|
|
|
withTahoeConfigDir (TahoeHandle configdir v) a = go =<< atomically needsstart
|
|
|
|
where
|
2014-10-09 18:53:13 +00:00
|
|
|
go True = do
|
2014-01-08 23:17:18 +00:00
|
|
|
startTahoeDaemon configdir
|
|
|
|
a configdir
|
|
|
|
go False = a configdir
|
|
|
|
needsstart = ifM (isEmptyTMVar v)
|
|
|
|
( do
|
|
|
|
putTMVar v ()
|
|
|
|
return True
|
|
|
|
, return False
|
|
|
|
)
|
|
|
|
|
2014-01-08 20:14:37 +00:00
|
|
|
boolTahoe :: TahoeConfigDir -> String -> [CommandParam] -> IO Bool
|
|
|
|
boolTahoe configdir command params = boolSystem "tahoe" $
|
|
|
|
tahoeParams configdir command params
|
|
|
|
|
2014-01-08 23:17:18 +00:00
|
|
|
{- Runs a tahoe command that requests the daemon do something. -}
|
|
|
|
requestTahoe :: TahoeHandle -> String -> [CommandParam] -> IO Bool
|
|
|
|
requestTahoe hdl command params = withTahoeConfigDir hdl $ \configdir ->
|
|
|
|
boolTahoe configdir command params
|
|
|
|
|
|
|
|
{- Runs a tahoe command that requests the daemon output something. -}
|
|
|
|
readTahoe :: TahoeHandle -> String -> [CommandParam] -> IO String
|
|
|
|
readTahoe hdl command params = withTahoeConfigDir hdl $ \configdir ->
|
|
|
|
catchDefaultIO "" $
|
|
|
|
readProcess "tahoe" $ toCommand $
|
|
|
|
tahoeParams configdir command params
|
2014-01-08 20:14:37 +00:00
|
|
|
|
|
|
|
tahoeParams :: TahoeConfigDir -> String -> [CommandParam] -> [CommandParam]
|
|
|
|
tahoeParams configdir command params =
|
2014-03-25 23:31:02 +00:00
|
|
|
Param "-d" : File configdir : Param command : params
|
2014-01-08 20:14:37 +00:00
|
|
|
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
storeCapability :: RemoteStateHandle -> Key -> Capability -> Annex ()
|
|
|
|
storeCapability rs k cap = setRemoteState rs k cap
|
2014-01-08 20:14:37 +00:00
|
|
|
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
getCapability :: RemoteStateHandle -> Key -> Annex (Maybe Capability)
|
|
|
|
getCapability rs k = getRemoteState rs k
|
2014-01-08 20:14:37 +00:00
|
|
|
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
getWhereisKey :: RemoteStateHandle -> Key -> Annex [String]
|
|
|
|
getWhereisKey rs k = disp <$> getCapability rs k
|
2015-11-30 19:35:53 +00:00
|
|
|
where
|
|
|
|
disp Nothing = []
|
|
|
|
disp (Just c) = [c]
|
|
|
|
|
2014-01-08 20:14:37 +00:00
|
|
|
{- tahoe put outputs a single line, containing the capability. -}
|
|
|
|
parsePut :: String -> Maybe Capability
|
|
|
|
parsePut s = case lines s of
|
|
|
|
[cap] | "URI" `isPrefixOf` cap -> Just cap
|
|
|
|
_ -> Nothing
|
|
|
|
|
|
|
|
{- tahoe check --raw outputs a json document.
|
|
|
|
- Its contents will vary (for LIT capabilities, it lacks most info),
|
|
|
|
- but should always contain a results object with a healthy value
|
|
|
|
- that's true or false.
|
|
|
|
-}
|
|
|
|
parseCheck :: String -> Either String Bool
|
|
|
|
parseCheck s = maybe parseerror (Right . healthy . results) (decode $ fromString s)
|
|
|
|
where
|
|
|
|
parseerror
|
|
|
|
| null s = Left "tahoe check failed to run"
|
|
|
|
| otherwise = Left "unable to parse tahoe check output"
|
|
|
|
|
|
|
|
data CheckRet = CheckRet { results :: Results }
|
|
|
|
data Results = Results { healthy :: Bool }
|
|
|
|
|
|
|
|
instance FromJSON CheckRet where
|
|
|
|
parseJSON (Object v) = CheckRet
|
|
|
|
<$> v .: "results"
|
|
|
|
parseJSON _ = mzero
|
|
|
|
|
|
|
|
instance FromJSON Results where
|
|
|
|
parseJSON (Object v) = Results
|
|
|
|
<$> v .: "healthy"
|
|
|
|
parseJSON _ = mzero
|