2011-04-08 20:44:43 +00:00
|
|
|
{- Using bup as a remote.
|
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
|
2011-04-08 20:44:43 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Remote.Bup (remote) where
|
|
|
|
|
|
|
|
import qualified Data.Map as M
|
2014-08-03 05:12:24 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
Use cryptohash rather than SHA for hashing.
This is a massive win on OSX, which doesn't have a sha256sum normally.
Only use external hash commands when the file is > 1 mb,
since cryptohash is quite close to them in speed.
SHA is still used to calculate HMACs. I don't quite understand
cryptohash's API for those.
Used the following benchmark to arrive at the 1 mb number.
1 mb file:
benchmarking sha256/internal
mean: 13.86696 ms, lb 13.83010 ms, ub 13.93453 ms, ci 0.950
std dev: 249.3235 us, lb 162.0448 us, ub 458.1744 us, ci 0.950
found 5 outliers among 100 samples (5.0%)
4 (4.0%) high mild
1 (1.0%) high severe
variance introduced by outliers: 10.415%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 14.20670 ms, lb 14.17237 ms, ub 14.27004 ms, ci 0.950
std dev: 230.5448 us, lb 150.7310 us, ub 427.6068 us, ci 0.950
found 3 outliers among 100 samples (3.0%)
2 (2.0%) high mild
1 (1.0%) high severe
2 mb file:
benchmarking sha256/internal
mean: 26.44270 ms, lb 26.23701 ms, ub 26.63414 ms, ci 0.950
std dev: 1.012303 ms, lb 925.8921 us, ub 1.122267 ms, ci 0.950
variance introduced by outliers: 35.540%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 26.84521 ms, lb 26.77644 ms, ub 26.91433 ms, ci 0.950
std dev: 347.7867 us, lb 210.6283 us, ub 571.3351 us, ci 0.950
found 6 outliers among 100 samples (6.0%)
import Crypto.Hash
import Data.ByteString.Lazy as L
import Criterion.Main
import Common
testfile :: FilePath
testfile = "/run/shm/data" -- on ram disk
main = defaultMain
[ bgroup "sha256"
[ bench "internal" $ whnfIO internal
, bench "external" $ whnfIO external
]
]
sha256 :: L.ByteString -> Digest SHA256
sha256 = hashlazy
internal :: IO String
internal = show . sha256 <$> L.readFile testfile
external :: IO String
external = do
s <- readProcess "sha256sum" [testfile]
return $ fst $ separate (== ' ') s
2013-09-22 23:45:08 +00:00
|
|
|
import Data.ByteString.Lazy.UTF8 (fromString)
|
2011-04-08 20:44:43 +00:00
|
|
|
|
2011-10-05 20:02:51 +00:00
|
|
|
import Common.Annex
|
2014-05-16 20:08:20 +00:00
|
|
|
import qualified Annex
|
2011-06-02 01:56:04 +00:00
|
|
|
import Types.Remote
|
2012-08-08 20:06:01 +00:00
|
|
|
import Types.Key
|
2014-02-11 18:06:50 +00:00
|
|
|
import Types.Creds
|
2011-06-30 17:16:57 +00:00
|
|
|
import qualified Git
|
2011-12-14 19:56:11 +00:00
|
|
|
import qualified Git.Command
|
2011-12-13 19:05:07 +00:00
|
|
|
import qualified Git.Config
|
|
|
|
import qualified Git.Construct
|
2012-04-11 16:45:05 +00:00
|
|
|
import qualified Git.Ref
|
2011-04-08 20:44:43 +00:00
|
|
|
import Config
|
2013-03-13 20:16:01 +00:00
|
|
|
import Config.Cost
|
2013-09-24 17:37:41 +00:00
|
|
|
import qualified Remote.Helper.Ssh as Ssh
|
2011-08-17 00:49:54 +00:00
|
|
|
import Remote.Helper.Special
|
2013-09-24 17:37:41 +00:00
|
|
|
import Remote.Helper.Messages
|
Use cryptohash rather than SHA for hashing.
This is a massive win on OSX, which doesn't have a sha256sum normally.
Only use external hash commands when the file is > 1 mb,
since cryptohash is quite close to them in speed.
SHA is still used to calculate HMACs. I don't quite understand
cryptohash's API for those.
Used the following benchmark to arrive at the 1 mb number.
1 mb file:
benchmarking sha256/internal
mean: 13.86696 ms, lb 13.83010 ms, ub 13.93453 ms, ci 0.950
std dev: 249.3235 us, lb 162.0448 us, ub 458.1744 us, ci 0.950
found 5 outliers among 100 samples (5.0%)
4 (4.0%) high mild
1 (1.0%) high severe
variance introduced by outliers: 10.415%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 14.20670 ms, lb 14.17237 ms, ub 14.27004 ms, ci 0.950
std dev: 230.5448 us, lb 150.7310 us, ub 427.6068 us, ci 0.950
found 3 outliers among 100 samples (3.0%)
2 (2.0%) high mild
1 (1.0%) high severe
2 mb file:
benchmarking sha256/internal
mean: 26.44270 ms, lb 26.23701 ms, ub 26.63414 ms, ci 0.950
std dev: 1.012303 ms, lb 925.8921 us, ub 1.122267 ms, ci 0.950
variance introduced by outliers: 35.540%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 26.84521 ms, lb 26.77644 ms, ub 26.91433 ms, ci 0.950
std dev: 347.7867 us, lb 210.6283 us, ub 571.3351 us, ci 0.950
found 6 outliers among 100 samples (6.0%)
import Crypto.Hash
import Data.ByteString.Lazy as L
import Criterion.Main
import Common
testfile :: FilePath
testfile = "/run/shm/data" -- on ram disk
main = defaultMain
[ bgroup "sha256"
[ bench "internal" $ whnfIO internal
, bench "external" $ whnfIO external
]
]
sha256 :: L.ByteString -> Digest SHA256
sha256 = hashlazy
internal :: IO String
internal = show . sha256 <$> L.readFile testfile
external :: IO String
external = do
s <- readProcess "sha256sum" [testfile]
return $ fst $ separate (== ' ') s
2013-09-22 23:45:08 +00:00
|
|
|
import Utility.Hash
|
2012-10-25 22:17:32 +00:00
|
|
|
import Utility.UserInfo
|
2013-09-07 22:38:00 +00:00
|
|
|
import Annex.UUID
|
2013-03-28 21:03:04 +00:00
|
|
|
import Utility.Metered
|
2011-04-08 20:44:43 +00:00
|
|
|
|
2011-04-09 19:36:54 +00:00
|
|
|
type BupRepo = String
|
|
|
|
|
2011-12-31 08:11:39 +00:00
|
|
|
remote :: RemoteType
|
2011-04-08 20:44:43 +00:00
|
|
|
remote = RemoteType {
|
|
|
|
typename = "bup",
|
2011-04-09 16:41:17 +00:00
|
|
|
enumerate = findSpecialRemotes "buprepo",
|
2011-04-08 20:44:43 +00:00
|
|
|
generate = gen,
|
|
|
|
setup = bupSetup
|
|
|
|
}
|
|
|
|
|
2013-09-12 19:54:35 +00:00
|
|
|
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
|
2013-01-01 17:52:47 +00:00
|
|
|
gen r u c gc = do
|
2011-04-09 19:36:54 +00:00
|
|
|
bupr <- liftIO $ bup2GitRemote buprepo
|
2013-01-01 17:52:47 +00:00
|
|
|
cst <- remoteCost gc $
|
|
|
|
if bupLocal buprepo
|
2013-03-13 20:16:01 +00:00
|
|
|
then nearlyCheapRemoteCost
|
2013-01-01 17:52:47 +00:00
|
|
|
else expensiveRemoteCost
|
2011-04-09 19:36:54 +00:00
|
|
|
(u', bupr') <- getBupUUID bupr u
|
2011-04-09 01:37:59 +00:00
|
|
|
|
2014-08-02 22:36:26 +00:00
|
|
|
let this = Remote
|
2013-01-01 17:52:47 +00:00
|
|
|
{ uuid = u'
|
|
|
|
, cost = cst
|
|
|
|
, name = Git.repoDescribe r
|
2014-08-02 22:36:26 +00:00
|
|
|
, storeKey = storeKeyDummy
|
|
|
|
, retrieveKeyFile = retreiveKeyFileDummy
|
2013-01-01 17:52:47 +00:00
|
|
|
, retrieveKeyFileCheap = retrieveCheap buprepo
|
run Preparer to get Remover and CheckPresent actions
This will allow special remotes to eg, open a http connection and reuse it,
while checking if chunks are present, or removing chunks.
S3 and WebDAV both need this to support chunks with reasonable speed.
Note that a special remote might want to cache a http connection across
multiple requests. A simple case of this is that CheckPresent is typically
called before Store or Remove. A remote using this interface can certianly
use a Preparer that eg, uses a MVar to cache a http connection.
However, it's up to the remote to then deal with things like stale or
stalled http connections when eg, doing a series of downloads from a remote
and other places. There could be long delays between calls to a remote,
which could lead to eg, http connection stalls; the machine might even
move to a new network, etc.
It might be nice to improve this interface later to allow
the simple case without needing to handle the full complex case.
One way to do it would be to have a `Transaction SpecialRemote cache`,
where SpecialRemote contains methods for Storer, Retriever, Remover, and
CheckPresent, that all expect to be passed a `cache`.
2014-08-06 18:28:36 +00:00
|
|
|
, removeKey = removeKeyDummy
|
|
|
|
, checkPresent = checkPresentDummy
|
2014-08-06 17:45:19 +00:00
|
|
|
, checkPresentCheap = bupLocal buprepo
|
2013-01-01 17:52:47 +00:00
|
|
|
, whereisKey = Nothing
|
2013-10-11 20:03:18 +00:00
|
|
|
, remoteFsck = Nothing
|
2013-10-27 19:38:59 +00:00
|
|
|
, repairRepo = Nothing
|
2013-01-01 17:52:47 +00:00
|
|
|
, config = c
|
|
|
|
, repo = r
|
|
|
|
, gitconfig = gc
|
|
|
|
, localpath = if bupLocal buprepo && not (null buprepo)
|
|
|
|
then Just buprepo
|
|
|
|
else Nothing
|
|
|
|
, remotetype = remote
|
2014-01-13 18:41:10 +00:00
|
|
|
, availability = if bupLocal buprepo then LocallyAvailable else GloballyAvailable
|
2013-01-01 17:52:47 +00:00
|
|
|
, readonly = False
|
2014-08-10 18:52:58 +00:00
|
|
|
, mkUnavailable = return Nothing
|
2014-10-21 18:36:09 +00:00
|
|
|
, getInfo = return [("repo", buprepo)]
|
2014-12-08 17:40:15 +00:00
|
|
|
, claimUrl = Nothing
|
2014-12-11 19:32:42 +00:00
|
|
|
, checkUrl = Nothing
|
2013-01-01 17:52:47 +00:00
|
|
|
}
|
2014-08-03 19:35:23 +00:00
|
|
|
return $ Just $ specialRemote' specialcfg c
|
2014-08-02 22:36:26 +00:00
|
|
|
(simplyPrepare $ store this buprepo)
|
|
|
|
(simplyPrepare $ retrieve buprepo)
|
run Preparer to get Remover and CheckPresent actions
This will allow special remotes to eg, open a http connection and reuse it,
while checking if chunks are present, or removing chunks.
S3 and WebDAV both need this to support chunks with reasonable speed.
Note that a special remote might want to cache a http connection across
multiple requests. A simple case of this is that CheckPresent is typically
called before Store or Remove. A remote using this interface can certianly
use a Preparer that eg, uses a MVar to cache a http connection.
However, it's up to the remote to then deal with things like stale or
stalled http connections when eg, doing a series of downloads from a remote
and other places. There could be long delays between calls to a remote,
which could lead to eg, http connection stalls; the machine might even
move to a new network, etc.
It might be nice to improve this interface later to allow
the simple case without needing to handle the full complex case.
One way to do it would be to have a `Transaction SpecialRemote cache`,
where SpecialRemote contains methods for Storer, Retriever, Remover, and
CheckPresent, that all expect to be passed a `cache`.
2014-08-06 18:28:36 +00:00
|
|
|
(simplyPrepare $ remove buprepo)
|
|
|
|
(simplyPrepare $ checkKey r bupr')
|
2014-08-02 22:36:26 +00:00
|
|
|
this
|
2013-01-01 17:52:47 +00:00
|
|
|
where
|
|
|
|
buprepo = fromMaybe (error "missing buprepo") $ remoteAnnexBupRepo gc
|
2014-08-03 19:35:23 +00:00
|
|
|
specialcfg = (specialRemoteCfg c)
|
|
|
|
-- chunking would not improve bup
|
|
|
|
{ chunkConfig = NoChunks
|
|
|
|
}
|
2011-04-08 20:44:43 +00:00
|
|
|
|
2014-02-11 18:06:50 +00:00
|
|
|
bupSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID)
|
|
|
|
bupSetup mu _ c = do
|
2013-09-07 22:38:00 +00:00
|
|
|
u <- maybe (liftIO genUUID) return mu
|
|
|
|
|
2011-04-08 20:44:43 +00:00
|
|
|
-- verify configuration is sane
|
2011-07-15 16:47:14 +00:00
|
|
|
let buprepo = fromMaybe (error "Specify buprepo=") $
|
2011-05-15 06:49:43 +00:00
|
|
|
M.lookup "buprepo" c
|
glacier, S3: Fix bug that caused embedded creds to not be encypted using the remote's key.
encryptionSetup must be called before setRemoteCredPair. Otherwise,
the RemoteConfig doesn't have the cipher in it, and so no cipher is used to
encrypt the embedded creds.
This is a security fix for non-shared encryption methods!
For encryption=shared, there's no security problem, just an
inconsistentency in whether the embedded creds are encrypted.
This is very important to get right, so used some types to help ensure that
setRemoteCredPair is only run after encryptionSetup. Note that the external
special remote bypasses the type safety, since creds can be set after the
initial remote config, if the external special remote program requests it.
Also note that IA remotes never use encryption, so encryptionSetup is not
run for them at all, and again the type safety is bypassed.
This leaves two open questions:
1. What to do about S3 and glacier remotes that were set up
using encryption=pubkey/hybrid with embedcreds?
Such a git repo has a security hole embedded in it, and this needs to be
communicated to the user. Is the changelog enough?
2. enableremote won't work in such a repo, because git-annex will
try to decrypt the embedded creds, which are not encrypted, so fails.
This needs to be dealt with, especially for ecryption=shared repos,
which are not really broken, just inconsistently configured.
Noticing that problem for encryption=shared is what led to commit
fbdeeeed5fa276d94be587c8916d725eddcaf546, which tried to
fix the problem by not decrypting the embedded creds.
This commit was sponsored by Josh Taylor.
2014-09-18 21:07:17 +00:00
|
|
|
(c', _encsetup) <- encryptionSetup c
|
2011-04-08 20:44:43 +00:00
|
|
|
|
|
|
|
-- bup init will create the repository.
|
|
|
|
-- (If the repository already exists, bup init again appears safe.)
|
2011-07-19 18:07:23 +00:00
|
|
|
showAction "bup init"
|
2012-01-24 19:28:13 +00:00
|
|
|
unlessM (bup "init" buprepo []) $ error "bup init failed"
|
2011-04-08 20:44:43 +00:00
|
|
|
|
2011-04-09 16:41:17 +00:00
|
|
|
storeBupUUID u buprepo
|
2011-04-09 16:34:49 +00:00
|
|
|
|
2011-04-09 16:41:17 +00:00
|
|
|
-- The buprepo is stored in git config, as well as this repo's
|
2011-04-08 20:44:43 +00:00
|
|
|
-- persistant state, so it can vary between hosts.
|
2011-04-16 17:25:27 +00:00
|
|
|
gitConfigSpecialRemote u c' "buprepo" buprepo
|
2011-04-08 20:44:43 +00:00
|
|
|
|
2013-09-07 22:38:00 +00:00
|
|
|
return (c', u)
|
2011-04-08 20:44:43 +00:00
|
|
|
|
2011-04-09 19:36:54 +00:00
|
|
|
bupParams :: String -> BupRepo -> [CommandParam] -> [CommandParam]
|
2011-04-09 16:41:17 +00:00
|
|
|
bupParams command buprepo params =
|
2011-07-15 16:47:14 +00:00
|
|
|
Param command : [Param "-r", Param buprepo] ++ params
|
2011-04-08 20:44:43 +00:00
|
|
|
|
2011-04-09 19:36:54 +00:00
|
|
|
bup :: String -> BupRepo -> [CommandParam] -> Annex Bool
|
2011-04-09 16:41:17 +00:00
|
|
|
bup command buprepo params = do
|
2011-07-19 18:07:23 +00:00
|
|
|
showOutput -- make way for bup output
|
2012-01-28 19:23:28 +00:00
|
|
|
liftIO $ boolSystem "bup" $ bupParams command buprepo params
|
2011-04-08 20:44:43 +00:00
|
|
|
|
2013-01-01 17:52:47 +00:00
|
|
|
bupSplitParams :: Remote -> BupRepo -> Key -> [CommandParam] -> Annex [CommandParam]
|
2011-04-17 03:01:29 +00:00
|
|
|
bupSplitParams r buprepo k src = do
|
2013-01-01 17:52:47 +00:00
|
|
|
let os = map Param $ remoteAnnexBupSplitOptions $ gitconfig r
|
2011-07-19 18:07:23 +00:00
|
|
|
showOutput -- make way for bup output
|
2011-04-17 03:01:29 +00:00
|
|
|
return $ bupParams "split" buprepo
|
2014-08-02 22:36:26 +00:00
|
|
|
(os ++ [Param "-q", Param "-n", Param (bupRef k)] ++ src)
|
|
|
|
|
|
|
|
store :: Remote -> BupRepo -> Storer
|
|
|
|
store r buprepo = byteStorer $ \k b p -> do
|
|
|
|
params <- bupSplitParams r buprepo k []
|
|
|
|
let cmd = proc "bup" (toCommand params)
|
|
|
|
liftIO $ withHandle StdinHandle createProcessSuccess cmd $ \h -> do
|
|
|
|
meteredWrite p h b
|
|
|
|
return True
|
|
|
|
|
|
|
|
retrieve :: BupRepo -> Retriever
|
2014-08-03 05:12:24 +00:00
|
|
|
retrieve buprepo = byteRetriever $ \k sink -> do
|
|
|
|
let params = bupParams "join" buprepo [Param $ bupRef k]
|
|
|
|
let p = proc "bup" (toCommand params)
|
|
|
|
(_, Just h, _, pid) <- liftIO $ createProcess $ p { std_out = CreatePipe }
|
|
|
|
liftIO (hClose h >> forceSuccessProcess p pid)
|
|
|
|
`after` (sink =<< liftIO (L.hGetContents h))
|
2011-04-08 20:44:43 +00:00
|
|
|
|
2012-01-20 17:23:11 +00:00
|
|
|
retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool
|
|
|
|
retrieveCheap _ _ _ = return False
|
|
|
|
|
2013-01-09 22:42:29 +00:00
|
|
|
{- Cannot revert having stored a key in bup, but at least the data for the
|
|
|
|
- key will be used for deltaing data of other keys stored later.
|
|
|
|
-
|
|
|
|
- We can, however, remove the git branch that bup created for the key.
|
|
|
|
-}
|
run Preparer to get Remover and CheckPresent actions
This will allow special remotes to eg, open a http connection and reuse it,
while checking if chunks are present, or removing chunks.
S3 and WebDAV both need this to support chunks with reasonable speed.
Note that a special remote might want to cache a http connection across
multiple requests. A simple case of this is that CheckPresent is typically
called before Store or Remove. A remote using this interface can certianly
use a Preparer that eg, uses a MVar to cache a http connection.
However, it's up to the remote to then deal with things like stale or
stalled http connections when eg, doing a series of downloads from a remote
and other places. There could be long delays between calls to a remote,
which could lead to eg, http connection stalls; the machine might even
move to a new network, etc.
It might be nice to improve this interface later to allow
the simple case without needing to handle the full complex case.
One way to do it would be to have a `Transaction SpecialRemote cache`,
where SpecialRemote contains methods for Storer, Retriever, Remover, and
CheckPresent, that all expect to be passed a `cache`.
2014-08-06 18:28:36 +00:00
|
|
|
remove :: BupRepo -> Remover
|
2014-08-02 22:36:26 +00:00
|
|
|
remove buprepo k = do
|
|
|
|
go =<< liftIO (bup2GitRemote buprepo)
|
|
|
|
warning "content cannot be completely removed from bup remote"
|
|
|
|
return True
|
2013-01-09 22:42:29 +00:00
|
|
|
where
|
|
|
|
go r
|
|
|
|
| Git.repoIsUrl r = void $ onBupRemote r boolSystem "git" params
|
2014-08-02 22:36:26 +00:00
|
|
|
| otherwise = void $ liftIO $ catchMaybeIO $ do
|
|
|
|
r' <- Git.Config.read r
|
|
|
|
boolSystem "git" $ Git.Command.gitCommandLine params r'
|
|
|
|
params = [ Params "branch -q -D", Param (bupRef k) ]
|
2013-01-09 22:42:29 +00:00
|
|
|
|
2011-04-08 20:44:43 +00:00
|
|
|
{- Bup does not provide a way to tell if a given dataset is present
|
|
|
|
- in a bup repository. One way it to check if the git repository has
|
|
|
|
- a branch matching the name (as created by bup split -n).
|
|
|
|
-}
|
run Preparer to get Remover and CheckPresent actions
This will allow special remotes to eg, open a http connection and reuse it,
while checking if chunks are present, or removing chunks.
S3 and WebDAV both need this to support chunks with reasonable speed.
Note that a special remote might want to cache a http connection across
multiple requests. A simple case of this is that CheckPresent is typically
called before Store or Remove. A remote using this interface can certianly
use a Preparer that eg, uses a MVar to cache a http connection.
However, it's up to the remote to then deal with things like stale or
stalled http connections when eg, doing a series of downloads from a remote
and other places. There could be long delays between calls to a remote,
which could lead to eg, http connection stalls; the machine might even
move to a new network, etc.
It might be nice to improve this interface later to allow
the simple case without needing to handle the full complex case.
One way to do it would be to have a `Transaction SpecialRemote cache`,
where SpecialRemote contains methods for Storer, Retriever, Remover, and
CheckPresent, that all expect to be passed a `cache`.
2014-08-06 18:28:36 +00:00
|
|
|
checkKey :: Git.Repo -> Git.Repo -> CheckPresent
|
2014-08-06 17:45:19 +00:00
|
|
|
checkKey r bupr k
|
2011-04-09 19:36:54 +00:00
|
|
|
| Git.repoIsUrl bupr = do
|
2013-09-24 17:37:41 +00:00
|
|
|
showChecking r
|
2014-08-06 17:45:19 +00:00
|
|
|
onBupRemote bupr boolSystem "git" params
|
|
|
|
| otherwise = liftIO $ boolSystem "git" $
|
|
|
|
Git.Command.gitCommandLine params bupr
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
params =
|
|
|
|
[ Params "show-ref --quiet --verify"
|
|
|
|
, Param $ "refs/heads/" ++ bupRef k
|
|
|
|
]
|
2011-04-09 16:34:49 +00:00
|
|
|
|
|
|
|
{- Store UUID in the annex.uuid setting of the bup repository. -}
|
2011-04-09 19:36:54 +00:00
|
|
|
storeBupUUID :: UUID -> BupRepo -> Annex ()
|
2011-04-09 16:41:17 +00:00
|
|
|
storeBupUUID u buprepo = do
|
|
|
|
r <- liftIO $ bup2GitRemote buprepo
|
2011-04-09 16:34:49 +00:00
|
|
|
if Git.repoIsUrl r
|
|
|
|
then do
|
2011-07-19 18:07:23 +00:00
|
|
|
showAction "storing uuid"
|
2012-01-24 19:28:13 +00:00
|
|
|
unlessM (onBupRemote r boolSystem "git"
|
|
|
|
[Params $ "config annex.uuid " ++ v]) $
|
|
|
|
error "ssh failed"
|
2011-04-09 16:34:49 +00:00
|
|
|
else liftIO $ do
|
2011-12-13 19:05:07 +00:00
|
|
|
r' <- Git.Config.read r
|
|
|
|
let olduuid = Git.Config.get "annex.uuid" "" r'
|
2011-11-08 19:34:10 +00:00
|
|
|
when (olduuid == "") $
|
2013-03-03 17:39:07 +00:00
|
|
|
Git.Command.run
|
|
|
|
[ Param "config"
|
|
|
|
, Param "annex.uuid"
|
|
|
|
, Param v
|
|
|
|
] r'
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
v = fromUUID u
|
2011-04-09 16:34:49 +00:00
|
|
|
|
2011-04-09 19:36:54 +00:00
|
|
|
onBupRemote :: Git.Repo -> (FilePath -> [CommandParam] -> IO a) -> FilePath -> [CommandParam] -> Annex a
|
|
|
|
onBupRemote r a command params = do
|
2014-05-16 20:08:20 +00:00
|
|
|
c <- Annex.getRemoteGitConfig r
|
2014-05-16 11:34:43 +00:00
|
|
|
sshparams <- Ssh.toRepo r c [Param $
|
2011-07-15 16:47:14 +00:00
|
|
|
"cd " ++ dir ++ " && " ++ unwords (command : toCommand params)]
|
2011-04-09 19:36:54 +00:00
|
|
|
liftIO $ a "ssh" sshparams
|
2013-06-21 08:28:43 +00:00
|
|
|
where
|
|
|
|
path = Git.repoPath r
|
|
|
|
base = fromMaybe path (stripPrefix "/~/" path)
|
|
|
|
dir = shellEscape base
|
2011-04-09 19:36:54 +00:00
|
|
|
|
2011-04-09 16:41:17 +00:00
|
|
|
{- Allow for bup repositories on removable media by checking
|
2011-04-09 16:59:18 +00:00
|
|
|
- local bup repositories to see if they are available, and getting their
|
|
|
|
- uuid (which may be different from the stored uuid for the bup remote).
|
|
|
|
-
|
2011-12-10 22:51:01 +00:00
|
|
|
- If a bup repository is not available, returns NoUUID.
|
2011-04-09 16:59:18 +00:00
|
|
|
- This will cause checkPresent to indicate nothing from the bup remote
|
|
|
|
- is known to be present.
|
2011-04-09 19:36:54 +00:00
|
|
|
-
|
|
|
|
- Also, returns a version of the repo with config read, if it is local.
|
2011-04-09 16:59:18 +00:00
|
|
|
-}
|
2011-04-09 19:36:54 +00:00
|
|
|
getBupUUID :: Git.Repo -> UUID -> Annex (UUID, Git.Repo)
|
|
|
|
getBupUUID r u
|
|
|
|
| Git.repoIsUrl r = return (u, r)
|
|
|
|
| otherwise = liftIO $ do
|
2012-02-03 20:47:24 +00:00
|
|
|
ret <- tryIO $ Git.Config.read r
|
2011-04-09 19:36:54 +00:00
|
|
|
case ret of
|
2011-12-13 19:05:07 +00:00
|
|
|
Right r' -> return (toUUID $ Git.Config.get "annex.uuid" "" r', r')
|
2011-11-07 18:46:01 +00:00
|
|
|
Left _ -> return (NoUUID, r)
|
2011-04-09 16:41:17 +00:00
|
|
|
|
2011-04-09 16:34:49 +00:00
|
|
|
{- Converts a bup remote path spec into a Git.Repo. There are some
|
|
|
|
- differences in path representation between git and bup. -}
|
2011-04-09 19:36:54 +00:00
|
|
|
bup2GitRemote :: BupRepo -> IO Git.Repo
|
2011-04-09 16:34:49 +00:00
|
|
|
bup2GitRemote "" = do
|
|
|
|
-- bup -r "" operates on ~/.bup
|
|
|
|
h <- myHomeDir
|
2011-12-13 19:05:07 +00:00
|
|
|
Git.Construct.fromAbsPath $ h </> ".bup"
|
2011-04-09 16:34:49 +00:00
|
|
|
bup2GitRemote r
|
|
|
|
| bupLocal r =
|
2011-12-15 22:11:42 +00:00
|
|
|
if "/" `isPrefixOf` r
|
2011-12-13 19:05:07 +00:00
|
|
|
then Git.Construct.fromAbsPath r
|
2011-04-09 16:34:49 +00:00
|
|
|
else error "please specify an absolute path"
|
2011-12-13 19:05:07 +00:00
|
|
|
| otherwise = Git.Construct.fromUrl $ "ssh://" ++ host ++ slash dir
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
bits = split ":" r
|
|
|
|
host = Prelude.head bits
|
2013-04-23 00:24:53 +00:00
|
|
|
dir = intercalate ":" $ drop 1 bits
|
2012-11-11 04:51:07 +00:00
|
|
|
-- "host:~user/dir" is not supported specially by bup;
|
|
|
|
-- "host:dir" is relative to the home directory;
|
|
|
|
-- "host:" goes in ~/.bup
|
|
|
|
slash d
|
|
|
|
| null d = "/~/.bup"
|
|
|
|
| "/" `isPrefixOf` d = d
|
|
|
|
| otherwise = "/~/" ++ d
|
2011-04-09 16:34:49 +00:00
|
|
|
|
2012-04-11 16:45:05 +00:00
|
|
|
{- Converts a key into a git ref name, which bup-split -n will use to point
|
|
|
|
- to it. -}
|
|
|
|
bupRef :: Key -> String
|
|
|
|
bupRef k
|
|
|
|
| Git.Ref.legal True shown = shown
|
Use cryptohash rather than SHA for hashing.
This is a massive win on OSX, which doesn't have a sha256sum normally.
Only use external hash commands when the file is > 1 mb,
since cryptohash is quite close to them in speed.
SHA is still used to calculate HMACs. I don't quite understand
cryptohash's API for those.
Used the following benchmark to arrive at the 1 mb number.
1 mb file:
benchmarking sha256/internal
mean: 13.86696 ms, lb 13.83010 ms, ub 13.93453 ms, ci 0.950
std dev: 249.3235 us, lb 162.0448 us, ub 458.1744 us, ci 0.950
found 5 outliers among 100 samples (5.0%)
4 (4.0%) high mild
1 (1.0%) high severe
variance introduced by outliers: 10.415%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 14.20670 ms, lb 14.17237 ms, ub 14.27004 ms, ci 0.950
std dev: 230.5448 us, lb 150.7310 us, ub 427.6068 us, ci 0.950
found 3 outliers among 100 samples (3.0%)
2 (2.0%) high mild
1 (1.0%) high severe
2 mb file:
benchmarking sha256/internal
mean: 26.44270 ms, lb 26.23701 ms, ub 26.63414 ms, ci 0.950
std dev: 1.012303 ms, lb 925.8921 us, ub 1.122267 ms, ci 0.950
variance introduced by outliers: 35.540%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 26.84521 ms, lb 26.77644 ms, ub 26.91433 ms, ci 0.950
std dev: 347.7867 us, lb 210.6283 us, ub 571.3351 us, ci 0.950
found 6 outliers among 100 samples (6.0%)
import Crypto.Hash
import Data.ByteString.Lazy as L
import Criterion.Main
import Common
testfile :: FilePath
testfile = "/run/shm/data" -- on ram disk
main = defaultMain
[ bgroup "sha256"
[ bench "internal" $ whnfIO internal
, bench "external" $ whnfIO external
]
]
sha256 :: L.ByteString -> Digest SHA256
sha256 = hashlazy
internal :: IO String
internal = show . sha256 <$> L.readFile testfile
external :: IO String
external = do
s <- readProcess "sha256sum" [testfile]
return $ fst $ separate (== ' ') s
2013-09-22 23:45:08 +00:00
|
|
|
| otherwise = "git-annex-" ++ show (sha256 (fromString shown))
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
shown = key2file k
|
2012-04-11 16:45:05 +00:00
|
|
|
|
2011-04-09 19:36:54 +00:00
|
|
|
bupLocal :: BupRepo -> Bool
|
2011-04-09 16:34:49 +00:00
|
|
|
bupLocal = notElem ':'
|