Added tahoe special remote.

Known problems:

1. Tries to tahoe start when daemon is already running.

2. If multiple tahoe remotes are set up on the same computer,
   they will have the same node.url configured by default,
   and this confuses tahoe commands.

This commit was sponsored by LeastAuthority.com
This commit is contained in:
Joey Hess 2014-01-08 16:14:37 -04:00
parent 5740489927
commit 85272d8a98
8 changed files with 299 additions and 1 deletions

View file

@ -34,6 +34,9 @@ import qualified Remote.Web
#ifdef WITH_WEBDAV
import qualified Remote.WebDAV
#endif
#ifdef WITH_TAHOE
import qualified Remote.Tahoe
#endif
import qualified Remote.Glacier
import qualified Remote.Hook
import qualified Remote.External
@ -51,6 +54,9 @@ remoteTypes =
, Remote.Web.remote
#ifdef WITH_WEBDAV
, Remote.WebDAV.remote
#endif
#ifdef WITH_TAHOE
, Remote.Tahoe.remote
#endif
, Remote.Glacier.remote
, Remote.Hook.remote

229
Remote/Tahoe.hs Normal file
View file

@ -0,0 +1,229 @@
{- Tahoe-LAFS special remotes.
-
- Tahoe capabilities for accessing objects stored in the remote
- are preserved in the remote state log.
-
- In order to allow multiple clones of a repository to access the same
- tahoe repository, git-annex needs to store the introducer furl,
- and the shared-convergence-secret. These are stored in the remote
- configuration, when embedcreds is enabled.
-
- Using those creds, git-annex sets up a tahoe configuration directory in
- ~/.tahoe/git-annex/UUID/
-
- Tahoe has its own encryption, so git-annex's encryption is not used.
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Remote.Tahoe (remote) where
import qualified Data.Map as M
import Data.Aeson
import Data.ByteString.Lazy.UTF8 (fromString)
import Common.Annex
import Types.Remote
import qualified Git
import Config
import Config.Cost
import Remote.Helper.Special
import Annex.UUID
import Annex.Content
import Logs.RemoteState
import Utility.UserInfo
import Utility.Metered
import Utility.Env
type TahoeConfigDir = FilePath
type SharedConvergenceSecret = String
type IntroducerFurl = String
type Capability = String
remote :: RemoteType
remote = RemoteType {
typename = "tahoe",
enumerate = findSpecialRemotes "tahoe",
generate = gen,
setup = tahoeSetup
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
cst <- remoteCost gc expensiveRemoteCost
configdir <- liftIO $ maybe (defaultTahoeConfigDir u) return (remoteAnnexTahoe gc)
return $ Just $ Remote {
uuid = u,
cost = cst,
name = Git.repoDescribe r,
storeKey = store u configdir,
retrieveKeyFile = retrieve u configdir,
retrieveKeyFileCheap = \_ _ -> return False,
removeKey = remove,
hasKey = checkPresent u configdir,
hasKeyCheap = False,
whereisKey = Nothing,
remoteFsck = Nothing,
repairRepo = Nothing,
config = c,
repo = r,
gitconfig = gc,
localpath = Nothing,
readonly = False,
globallyAvailable = True,
remotetype = remote
}
tahoeSetup :: Maybe UUID -> RemoteConfig -> Annex (RemoteConfig, UUID)
tahoeSetup mu c = do
furl <- fromMaybe (fromMaybe missingfurl $ M.lookup furlk c)
<$> liftIO (getEnv "TAHOE_FURL")
u <- maybe (liftIO genUUID) return mu
configdir <- liftIO $ defaultTahoeConfigDir u
scs <- liftIO $ tahoeConfigure configdir furl (M.lookup scsk c)
let c' = if M.lookup "embedcreds" c == Just "yes"
then flip M.union c $ M.fromList
[ (furlk, furl)
, (scsk, scs)
]
else c
gitConfigSpecialRemote u c' "tahoe" configdir
return (c', u)
where
scsk = "shared-convergence-secret"
furlk = "introducer-furl"
missingfurl = error "Set TAHOE_FURL to the introducer furl to use."
store :: UUID -> TahoeConfigDir -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
store u configdir k _f _p = sendAnnex k noop $ \src -> do
liftIO $ startTahoeDaemon configdir
parsePut <$> liftIO (readTahoe configdir "put" [File src]) >>= maybe
(return False)
(\cap -> storeCapability u k cap >> return True)
retrieve :: UUID -> TahoeConfigDir -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
retrieve u configdir k _f d _p = go =<< getCapability u k
where
go Nothing = return False
go (Just cap) = liftIO $ do
startTahoeDaemon configdir
boolTahoe configdir "get" [Param cap, File d]
remove :: Key -> Annex Bool
remove _k = do
warning "content cannot be removed from tahoe remote"
return False
checkPresent :: UUID -> TahoeConfigDir -> Key -> Annex (Either String Bool)
checkPresent u configdir k = go =<< getCapability u k
where
go Nothing = return (Right False)
go (Just cap) = liftIO $ do
startTahoeDaemon configdir
parseCheck <$> readTahoe configdir "check"
[ Param "--raw"
, Param cap
]
defaultTahoeConfigDir :: UUID -> IO TahoeConfigDir
defaultTahoeConfigDir u = do
h <- myHomeDir
return $ h </> ".tahoe" </> "git-annex" </> fromUUID u
tahoeConfigure :: TahoeConfigDir -> IntroducerFurl -> Maybe SharedConvergenceSecret -> IO SharedConvergenceSecret
tahoeConfigure configdir furl mscs = do
unlessM (createClient configdir furl) $
error "tahoe create-client failed"
maybe noop (writeSharedConvergenceSecret configdir) mscs
startTahoeDaemon configdir
getSharedConvergenceSecret configdir
createClient :: TahoeConfigDir -> IntroducerFurl -> IO Bool
createClient configdir furl = do
createDirectoryIfMissing True (parentDir configdir)
boolTahoe configdir "create-client"
[ Param "--nickname", Param "git-annex"
, Param "--introducer", Param furl
]
writeSharedConvergenceSecret :: TahoeConfigDir -> SharedConvergenceSecret -> IO ()
writeSharedConvergenceSecret configdir scs =
writeFile (convergenceFile configdir) (unlines [scs])
{- The tahoe daemon writes the convergenceFile shortly after it starts
- (it does not need to connect to the network). So, try repeatedly to read
- the file, for up to 1 minute. To avoid reading a partially written
- file, look for the newline after the value. -}
getSharedConvergenceSecret :: TahoeConfigDir -> IO SharedConvergenceSecret
getSharedConvergenceSecret configdir = go (60 :: Int)
where
f = convergenceFile configdir
go n
| n == 0 = error $ "tahoe did not write " ++ f ++ " after 1 minute. Perhaps the daemon failed to start?"
| otherwise = do
v <- catchMaybeIO (readFile f)
case v of
Just s | "\n" `isSuffixOf` s || "\r" `isSuffixOf` s ->
return $ takeWhile (`notElem` "\n\r") s
_ -> go (n - 1)
convergenceFile :: TahoeConfigDir -> FilePath
convergenceFile configdir = configdir </> "private" </> "convergence"
{- XXX Avoid starting tahoe if it is already running. -}
startTahoeDaemon :: TahoeConfigDir -> IO ()
startTahoeDaemon configdir = void $ boolTahoe configdir "start" []
boolTahoe :: TahoeConfigDir -> String -> [CommandParam] -> IO Bool
boolTahoe configdir command params = boolSystem "tahoe" $
tahoeParams configdir command params
readTahoe :: TahoeConfigDir -> String -> [CommandParam] -> IO String
readTahoe configdir command params = catchDefaultIO "" $
readProcess "tahoe" $ toCommand $
tahoeParams configdir command params
tahoeParams :: TahoeConfigDir -> String -> [CommandParam] -> [CommandParam]
tahoeParams configdir command params =
Param command : Param "-d" : File configdir : params
storeCapability :: UUID -> Key -> Capability -> Annex ()
storeCapability u k cap = setRemoteState u k cap
getCapability :: UUID -> Key -> Annex (Maybe Capability)
getCapability u k = getRemoteState u k
{- tahoe put outputs a single line, containing the capability. -}
parsePut :: String -> Maybe Capability
parsePut s = case lines s of
[cap] | "URI" `isPrefixOf` cap -> Just cap
_ -> Nothing
{- tahoe check --raw outputs a json document.
- Its contents will vary (for LIT capabilities, it lacks most info),
- but should always contain a results object with a healthy value
- that's true or false.
-}
parseCheck :: String -> Either String Bool
parseCheck s = maybe parseerror (Right . healthy . results) (decode $ fromString s)
where
parseerror
| null s = Left "tahoe check failed to run"
| otherwise = Left "unable to parse tahoe check output"
data CheckRet = CheckRet { results :: Results }
data Results = Results { healthy :: Bool }
instance FromJSON CheckRet where
parseJSON (Object v) = CheckRet
<$> v .: "results"
parseJSON _ = mzero
instance FromJSON Results where
parseJSON (Object v) = Results
<$> v .: "healthy"
parseJSON _ = mzero

View file

@ -110,6 +110,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexGnupgOptions :: [String]
, remoteAnnexRsyncUrl :: Maybe String
, remoteAnnexBupRepo :: Maybe String
, remoteAnnexTahoe :: Maybe FilePath
, remoteAnnexBupSplitOptions :: [String]
, remoteAnnexDirectory :: Maybe FilePath
, remoteAnnexGCrypt :: Maybe String
@ -136,6 +137,7 @@ extractRemoteGitConfig r remotename = RemoteGitConfig
, remoteAnnexGnupgOptions = getoptions "gnupg-options"
, remoteAnnexRsyncUrl = notempty $ getmaybe "rsyncurl"
, remoteAnnexBupRepo = getmaybe "buprepo"
, remoteAnnexTahoe = getmaybe "tahoe"
, remoteAnnexBupSplitOptions = getoptions "bup-split-options"
, remoteAnnexDirectory = notempty $ getmaybe "directory"
, remoteAnnexGCrypt = notempty $ getmaybe "gcrypt"

6
debian/changelog vendored
View file

@ -1,3 +1,9 @@
git-annex (5.20140108) UNRELEASED; urgency=medium
* Added tahoe special remote.
-- Joey Hess <joeyh@debian.org> Wed, 08 Jan 2014 13:13:54 -0400
git-annex (5.20140107) unstable; urgency=medium
* mirror: Support --all (and --unused).

View file

@ -1401,6 +1401,11 @@ Here are all the supported configuration settings.
Used to identify webdav special remotes.
Normally this is automatically set up by `git annex initremote`.
* `remote.<name>.tahoe`
Used to identify tahoe special remotes.
Points to the configuration directory for tahoe.
* `remote.<name>.annex-xmppaddress`
Used to identify the XMPP address of a Jabber buddy.

View file

@ -14,6 +14,7 @@ They cannot be used by other git commands though.
* [[directory]]
* [[rsync]]
* [[webdav]]
* [[tahoe]]
* [[web]]
* [[xmpp]]
* [[hook]]
@ -27,7 +28,6 @@ for using git-annex with various services:
* [[Amazon_S3|tips/using_Amazon_S3]]
* [[Amazon_Glacier|tips/using_Amazon_Glacier]]
* [[tips/Internet_Archive_via_S3]]
* [[tahoe-lafs|forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs]]
* [[Box.com|tips/using_box.com_as_a_special_remote]]
* [[Google drive|tips/googledriveannex]]
* [[Google Cloud Storage|tips/using_Google_Cloud_Storage]]

View file

@ -0,0 +1,43 @@
This special remote stores file contents using
[Tahoe-LAFS](http://tahoe-lafs.org/). There are a number of commercial
providers, or you can build your own tahoe storage grid.
Since Tahoe-LAFS encrypts all data stored in it, git-annex does not do any
additional encryption of its own.
Note that data stored in a tahoe remote cannot be dropped from it, as
Tahoe-LAFS does not support removing data once it is stored in the Tahoe grid.
This, along with Tahoe's ability to recover data when some nodes fail,
makes a tahoe special remote an excellent choice for storing backups.
Typically you will have an account on a Tahoe-LAFS storage grid, which
is represented by an "introducer furl". You need to supply this to
git-annex in the `TAHOE_FURL` environment variable when initializing the
remote. git-annex will then generate a tahoe configuration directory for
the remote under `~/.tahoe/git-annex/`, and automatically start the tahoe
daemon as needed.
## configuration
These parameters can be passed to `git annex initremote` to configure
the tahoe remote.
* `embedcreds` - Optional. Set to "yes" embed the tahoe credentials
(specifically the introducer furl and shared-convergence-secret)
inside the git repository, which allows other clones to also use them
in order to access the tahoe grid.
Think carefully about who can access your git repository, and
whether you want to give them access to your tahoe system before
using embedcreds!
Setup example:
# TAHOE_FURL=... git annex initremote tahoe type=tahoe embedcreds=yes
----
An older implementation of tahoe for git-annex used
the hook special remote. It is not compatible with this newer
implementation. See
[[tahoe-lafs|forum/tips:_special__95__remotes__47__hook_with_tahoe-lafs]].

View file

@ -75,6 +75,9 @@ Flag Feed
Flag Quvi
Description: Enable use of quvi to download videos
Flag Tahoe
Description: Enable the tahoe special remote
Flag CryptoHash
Description: Enable use of cryptohash for checksumming
@ -192,6 +195,10 @@ Executable git-annex
if flag(Quvi)
Build-Depends: aeson
CPP-Options: -DWITH_QUVI
if flag(Tahoe)
Build-Depends: aeson
CPP-Options: -DWITH_TAHOE
if flag(EKG)
Build-Depends: ekg