ddar special remote
This commit is contained in:
parent
f876850013
commit
4184566627
7 changed files with 312 additions and 0 deletions
229
Remote/Ddar.hs
Normal file
229
Remote/Ddar.hs
Normal file
|
@ -0,0 +1,229 @@
|
||||||
|
{- Using ddar as a remote. Based on bup and rsync remotes.
|
||||||
|
-
|
||||||
|
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
||||||
|
- Copyright 2014 Robie Basak <robie@justgohome.co.uk>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Remote.Ddar (remote) where
|
||||||
|
|
||||||
|
import Control.Exception
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import System.IO.Error
|
||||||
|
import System.Process
|
||||||
|
|
||||||
|
import Data.String.Utils
|
||||||
|
import Common.Annex
|
||||||
|
import Types.Remote
|
||||||
|
import Types.Key
|
||||||
|
import Types.Creds
|
||||||
|
import qualified Git
|
||||||
|
import Config
|
||||||
|
import Config.Cost
|
||||||
|
import Remote.Helper.Special
|
||||||
|
import Remote.Helper.Encryptable
|
||||||
|
import Crypto
|
||||||
|
import Annex.Content
|
||||||
|
import Annex.Ssh
|
||||||
|
import Annex.UUID
|
||||||
|
import Utility.Metered
|
||||||
|
|
||||||
|
type DdarRepo = String
|
||||||
|
|
||||||
|
remote :: RemoteType
|
||||||
|
remote = RemoteType {
|
||||||
|
typename = "ddar",
|
||||||
|
enumerate = findSpecialRemotes "ddarrepo",
|
||||||
|
generate = gen,
|
||||||
|
setup = ddarSetup
|
||||||
|
}
|
||||||
|
|
||||||
|
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
|
||||||
|
gen r u c gc = do
|
||||||
|
cst <- remoteCost gc $
|
||||||
|
if ddarLocal ddarrepo
|
||||||
|
then nearlyCheapRemoteCost
|
||||||
|
else expensiveRemoteCost
|
||||||
|
|
||||||
|
let new = Remote
|
||||||
|
{ uuid = u
|
||||||
|
, cost = cst
|
||||||
|
, name = Git.repoDescribe r
|
||||||
|
, storeKey = store ddarrepo
|
||||||
|
, retrieveKeyFile = retrieve ddarrepo
|
||||||
|
, retrieveKeyFileCheap = retrieveCheap
|
||||||
|
, removeKey = remove ddarrepo
|
||||||
|
, hasKey = checkPresent ddarrepo
|
||||||
|
, hasKeyCheap = ddarLocal ddarrepo
|
||||||
|
, whereisKey = Nothing
|
||||||
|
, remoteFsck = Nothing
|
||||||
|
, repairRepo = Nothing
|
||||||
|
, config = c
|
||||||
|
, repo = r
|
||||||
|
, gitconfig = gc
|
||||||
|
, localpath = if ddarLocal ddarrepo && not (null ddarrepo)
|
||||||
|
then Just ddarrepo
|
||||||
|
else Nothing
|
||||||
|
, remotetype = remote
|
||||||
|
, availability = if ddarLocal ddarrepo then LocallyAvailable else GloballyAvailable
|
||||||
|
, readonly = False
|
||||||
|
}
|
||||||
|
return $ Just $ encryptableRemote c
|
||||||
|
(storeEncrypted new ddarrepo)
|
||||||
|
(retrieveEncrypted ddarrepo)
|
||||||
|
new
|
||||||
|
where
|
||||||
|
ddarrepo = fromMaybe (error "missing ddarrepo") $ remoteAnnexDdarRepo gc
|
||||||
|
|
||||||
|
ddarSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID)
|
||||||
|
ddarSetup mu _ c = do
|
||||||
|
u <- maybe (liftIO genUUID) return mu
|
||||||
|
|
||||||
|
-- verify configuration is sane
|
||||||
|
let ddarrepo = fromMaybe (error "Specify ddarrepo=") $
|
||||||
|
M.lookup "ddarrepo" c
|
||||||
|
c' <- encryptionSetup c
|
||||||
|
|
||||||
|
-- The ddarrepo is stored in git config, as well as this repo's
|
||||||
|
-- persistant state, so it can vary between hosts.
|
||||||
|
gitConfigSpecialRemote u c' "ddarrepo" ddarrepo
|
||||||
|
|
||||||
|
return (c', u)
|
||||||
|
|
||||||
|
pipeDdar :: [CommandParam] -> Maybe Handle -> Maybe Handle -> IO Bool
|
||||||
|
pipeDdar params inh outh = do
|
||||||
|
p <- runProcess "ddar" (toCommand params)
|
||||||
|
Nothing Nothing inh outh Nothing
|
||||||
|
ok <- waitForProcess p
|
||||||
|
case ok of
|
||||||
|
ExitSuccess -> return True
|
||||||
|
_ -> return False
|
||||||
|
|
||||||
|
store :: DdarRepo -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
|
||||||
|
store ddarrepo k _f _p = sendAnnex k (void $ remove ddarrepo k) $ \src -> do
|
||||||
|
let params =
|
||||||
|
[ Param "c"
|
||||||
|
, Param "-N"
|
||||||
|
, Param $ key2file k
|
||||||
|
, Param ddarrepo
|
||||||
|
, File src
|
||||||
|
]
|
||||||
|
liftIO $ boolSystem "ddar" params
|
||||||
|
|
||||||
|
storeEncrypted :: Remote -> DdarRepo -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
|
||||||
|
storeEncrypted r ddarrepo (cipher, enck) k _p =
|
||||||
|
sendAnnex k (void $ remove ddarrepo k) $ \src ->
|
||||||
|
liftIO $ catchBoolIO $
|
||||||
|
encrypt (getGpgEncParams r) cipher (feedFile src) $ \h ->
|
||||||
|
pipeDdar params (Just h) Nothing
|
||||||
|
where
|
||||||
|
params =
|
||||||
|
[ Param "c"
|
||||||
|
, Param "-N"
|
||||||
|
, Param $ key2file enck
|
||||||
|
, Param ddarrepo
|
||||||
|
, Param "-"
|
||||||
|
]
|
||||||
|
|
||||||
|
{- Convert remote DdarRepo to host and path on remote end -}
|
||||||
|
splitRemoteDdarRepo :: DdarRepo -> (String, String)
|
||||||
|
splitRemoteDdarRepo ddarrepo =
|
||||||
|
(host, ddarrepo')
|
||||||
|
where
|
||||||
|
(host, remainder) = span (/= ':') ddarrepo
|
||||||
|
ddarrepo' = drop 1 remainder
|
||||||
|
|
||||||
|
{- Return the command and parameters to use for a ddar call that may need to be
|
||||||
|
- made on a remote repository. This will call ssh if needed. -}
|
||||||
|
|
||||||
|
ddarRemoteCall :: DdarRepo -> Char -> [CommandParam] -> Annex (String, [CommandParam])
|
||||||
|
ddarRemoteCall ddarrepo cmd params
|
||||||
|
| ddarLocal ddarrepo = return ("ddar", localParams)
|
||||||
|
| otherwise = do
|
||||||
|
remoteCachingParams <- sshCachingOptions (host, Nothing) []
|
||||||
|
return ("ssh", remoteCachingParams ++ remoteParams)
|
||||||
|
where
|
||||||
|
(host, ddarrepo') = splitRemoteDdarRepo ddarrepo
|
||||||
|
localParams = Param [cmd] : Param ddarrepo : params
|
||||||
|
remoteParams = Param host : Param "ddar" : Param [cmd] : Param ddarrepo' : params
|
||||||
|
|
||||||
|
{- Specialized ddarRemoteCall that includes extraction command and flags -}
|
||||||
|
|
||||||
|
ddarExtractRemoteCall :: DdarRepo -> Key -> Annex (String, [CommandParam])
|
||||||
|
ddarExtractRemoteCall ddarrepo k =
|
||||||
|
ddarRemoteCall ddarrepo 'x' [Param "--force-stdout", Param $ key2file k]
|
||||||
|
|
||||||
|
retrieve :: DdarRepo -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool
|
||||||
|
retrieve ddarrepo k _f d _p = do
|
||||||
|
(cmd, params) <- ddarExtractRemoteCall ddarrepo k
|
||||||
|
liftIO $ catchBoolIO $ withFile d WriteMode $ \h -> do
|
||||||
|
let p = (proc cmd $ toCommand params){ std_out = UseHandle h }
|
||||||
|
(_, _, _, pid) <- Common.Annex.createProcess p
|
||||||
|
forceSuccessProcess p pid
|
||||||
|
return True
|
||||||
|
|
||||||
|
retrieveCheap :: Key -> FilePath -> Annex Bool
|
||||||
|
retrieveCheap _ _ = return False
|
||||||
|
|
||||||
|
retrieveEncrypted :: DdarRepo -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool
|
||||||
|
retrieveEncrypted ddarrepo (cipher, enck) _ f _p = do
|
||||||
|
(cmd, params) <- ddarExtractRemoteCall ddarrepo enck
|
||||||
|
let p = proc cmd $ toCommand params
|
||||||
|
liftIO $ catchBoolIO $ withHandle StdoutHandle createProcessSuccess p $ \h -> do
|
||||||
|
decrypt cipher (\toh -> L.hPut toh =<< L.hGetContents h) $
|
||||||
|
readBytes $ L.writeFile f
|
||||||
|
return True
|
||||||
|
|
||||||
|
remove :: DdarRepo -> Key -> Annex Bool
|
||||||
|
remove ddarrepo key = do
|
||||||
|
(cmd, params) <- ddarRemoteCall ddarrepo 'd' [Param $ key2file key]
|
||||||
|
liftIO $ boolSystem cmd params
|
||||||
|
|
||||||
|
ddarDirectoryExists :: DdarRepo -> Annex (Either String Bool)
|
||||||
|
ddarDirectoryExists ddarrepo
|
||||||
|
| ddarLocal ddarrepo = do
|
||||||
|
maybeStatus <- liftIO $ tryJust (guard . isDoesNotExistError) $ getFileStatus ddarrepo
|
||||||
|
return $ case maybeStatus of
|
||||||
|
Left _ -> Right False
|
||||||
|
Right status -> Right $ isDirectory status
|
||||||
|
| otherwise = do
|
||||||
|
sshCachingParams <- sshCachingOptions (host, Nothing) []
|
||||||
|
exitCode <- liftIO $ safeSystem "ssh" $ sshCachingParams ++ params
|
||||||
|
case exitCode of
|
||||||
|
ExitSuccess -> return $ Right True
|
||||||
|
ExitFailure 1 -> return $ Right False
|
||||||
|
ExitFailure code -> return $ Left $ "ssh call " ++
|
||||||
|
show (Data.String.Utils.join " " $ toCommand params) ++
|
||||||
|
" failed with status " ++ show code
|
||||||
|
where
|
||||||
|
(host, ddarrepo') = splitRemoteDdarRepo ddarrepo
|
||||||
|
params =
|
||||||
|
[ Param host
|
||||||
|
, Param "test"
|
||||||
|
, Param "-d"
|
||||||
|
, Param ddarrepo'
|
||||||
|
]
|
||||||
|
|
||||||
|
{- Use "ddar t" to determine if a given key is present in a ddar archive -}
|
||||||
|
inDdarManifest :: DdarRepo -> Key -> Annex (Either String Bool)
|
||||||
|
inDdarManifest ddarrepo k = do
|
||||||
|
(cmd, params) <- ddarRemoteCall ddarrepo 't' []
|
||||||
|
let p = proc cmd $ toCommand params
|
||||||
|
liftIO $ catchMsgIO $ withHandle StdoutHandle createProcessSuccess p $ \h -> do
|
||||||
|
contents <- hGetContents h
|
||||||
|
return $ elem k' $ lines contents
|
||||||
|
where
|
||||||
|
k' = key2file k
|
||||||
|
|
||||||
|
checkPresent :: DdarRepo -> Key -> Annex (Either String Bool)
|
||||||
|
checkPresent ddarrepo key = do
|
||||||
|
directoryExists <- ddarDirectoryExists ddarrepo
|
||||||
|
case directoryExists of
|
||||||
|
Left e -> return $ Left e
|
||||||
|
Right True -> inDdarManifest ddarrepo key
|
||||||
|
Right False -> return $ Right False
|
||||||
|
|
||||||
|
ddarLocal :: DdarRepo -> Bool
|
||||||
|
ddarLocal = notElem ':'
|
|
@ -38,6 +38,7 @@ import qualified Remote.WebDAV
|
||||||
import qualified Remote.Tahoe
|
import qualified Remote.Tahoe
|
||||||
#endif
|
#endif
|
||||||
import qualified Remote.Glacier
|
import qualified Remote.Glacier
|
||||||
|
import qualified Remote.Ddar
|
||||||
import qualified Remote.Hook
|
import qualified Remote.Hook
|
||||||
import qualified Remote.External
|
import qualified Remote.External
|
||||||
|
|
||||||
|
@ -59,6 +60,7 @@ remoteTypes =
|
||||||
, Remote.Tahoe.remote
|
, Remote.Tahoe.remote
|
||||||
#endif
|
#endif
|
||||||
, Remote.Glacier.remote
|
, Remote.Glacier.remote
|
||||||
|
, Remote.Ddar.remote
|
||||||
, Remote.Hook.remote
|
, Remote.Hook.remote
|
||||||
, Remote.External.remote
|
, Remote.External.remote
|
||||||
]
|
]
|
||||||
|
|
|
@ -131,6 +131,7 @@ data RemoteGitConfig = RemoteGitConfig
|
||||||
, remoteAnnexBupSplitOptions :: [String]
|
, remoteAnnexBupSplitOptions :: [String]
|
||||||
, remoteAnnexDirectory :: Maybe FilePath
|
, remoteAnnexDirectory :: Maybe FilePath
|
||||||
, remoteAnnexGCrypt :: Maybe String
|
, remoteAnnexGCrypt :: Maybe String
|
||||||
|
, remoteAnnexDdarRepo :: Maybe String
|
||||||
, remoteAnnexHookType :: Maybe String
|
, remoteAnnexHookType :: Maybe String
|
||||||
, remoteAnnexExternalType :: Maybe String
|
, remoteAnnexExternalType :: Maybe String
|
||||||
{- A regular git remote's git repository config. -}
|
{- A regular git remote's git repository config. -}
|
||||||
|
@ -162,6 +163,7 @@ extractRemoteGitConfig r remotename = RemoteGitConfig
|
||||||
, remoteAnnexBupSplitOptions = getoptions "bup-split-options"
|
, remoteAnnexBupSplitOptions = getoptions "bup-split-options"
|
||||||
, remoteAnnexDirectory = notempty $ getmaybe "directory"
|
, remoteAnnexDirectory = notempty $ getmaybe "directory"
|
||||||
, remoteAnnexGCrypt = notempty $ getmaybe "gcrypt"
|
, remoteAnnexGCrypt = notempty $ getmaybe "gcrypt"
|
||||||
|
, remoteAnnexDdarRepo = getmaybe "ddarrepo"
|
||||||
, remoteAnnexHookType = notempty $ getmaybe "hooktype"
|
, remoteAnnexHookType = notempty $ getmaybe "hooktype"
|
||||||
, remoteAnnexExternalType = notempty $ getmaybe "externaltype"
|
, remoteAnnexExternalType = notempty $ getmaybe "externaltype"
|
||||||
, remoteGitConfig = Nothing
|
, remoteGitConfig = Nothing
|
||||||
|
|
|
@ -1647,6 +1647,12 @@ Here are all the supported configuration settings.
|
||||||
the location of the bup repository to use. Normally this is automatically
|
the location of the bup repository to use. Normally this is automatically
|
||||||
set up by `git annex initremote`, but you can change it if needed.
|
set up by `git annex initremote`, but you can change it if needed.
|
||||||
|
|
||||||
|
* `remote.<name>.ddarrepo`
|
||||||
|
|
||||||
|
Used by ddar special remotes, this configures
|
||||||
|
the location of the ddar repository to use. Normally this is automatically
|
||||||
|
set up by `git annex initremote`, but you can change it if needed.
|
||||||
|
|
||||||
* `remote.<name>.directory`
|
* `remote.<name>.directory`
|
||||||
|
|
||||||
Used by directory special remotes, this configures
|
Used by directory special remotes, this configures
|
||||||
|
|
|
@ -10,6 +10,7 @@ They cannot be used by other git commands though.
|
||||||
* [[S3]] (Amazon S3, and other compatible services)
|
* [[S3]] (Amazon S3, and other compatible services)
|
||||||
* [[Amazon_Glacier|glacier]]
|
* [[Amazon_Glacier|glacier]]
|
||||||
* [[bup]]
|
* [[bup]]
|
||||||
|
* [[ddar]]
|
||||||
* [[gcrypt]] (encrypted git repositories!)
|
* [[gcrypt]] (encrypted git repositories!)
|
||||||
* [[directory]]
|
* [[directory]]
|
||||||
* [[rsync]]
|
* [[rsync]]
|
||||||
|
|
40
doc/special_remotes/ddar.mdwn
Normal file
40
doc/special_remotes/ddar.mdwn
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
This special remote type stores file contents in a
|
||||||
|
[ddar](https://github.com/basak/ddar) repository. This provides easy
|
||||||
|
de-duplication when you use git-annex to manage many files that are similar.
|
||||||
|
|
||||||
|
Unlike bup, ddar uses its own storage format, which allows for both creation
|
||||||
|
and deletion of de-deduplicated files. In addition to using local storage, ddar
|
||||||
|
archives can be remote, providing that ddar is installed on the remote machine
|
||||||
|
and ssh is available to it.
|
||||||
|
|
||||||
|
See [[walkthrough/using_ddar]] for usage examples.
|
||||||
|
|
||||||
|
## encryption
|
||||||
|
|
||||||
|
Encryption is nominally supported, but is not useful. Since effective
|
||||||
|
encryption necessarily obfuscates file contents, similar areas across different
|
||||||
|
files are no longer visible to ddar and cannot be de-duplicated.
|
||||||
|
|
||||||
|
## compression
|
||||||
|
|
||||||
|
The same caveat with encryption also generally applies to compression, since
|
||||||
|
file compression changes file contents such that similar regions across files
|
||||||
|
no longer appear similar. An exception is `gzip --rsyncable`, which is
|
||||||
|
specifically designed to work around this issue. This is the only compression
|
||||||
|
mechanism with which de-duplication remains effective.
|
||||||
|
|
||||||
|
## configuration
|
||||||
|
|
||||||
|
These parameters can be passed to `git annex initremote` to configure ddar:
|
||||||
|
|
||||||
|
* `encryption` - One of "none", "hybrid", "shared", or "pubkey".
|
||||||
|
See [[encryption]]. However, note that encryption renders all de-duplication
|
||||||
|
ineffective.
|
||||||
|
|
||||||
|
* `keyid` - Specifies the gpg key to use for [[encryption]].
|
||||||
|
|
||||||
|
* `ddarrepo` - Required. This is passed to `ddar` as the path to the ddar
|
||||||
|
archive to use. If it doesn't exist, the ddar repository will be created
|
||||||
|
automatically when a file is first copied to it. To use a remote ddar
|
||||||
|
repository, use a colon (`:`) to separate the hostname from the remote path.
|
||||||
|
Example: "ddarrepo=example.com:/big/myddar" or "ddarrepo=/big/myddar"
|
32
doc/walkthrough/using_ddar.mdwn
Normal file
32
doc/walkthrough/using_ddar.mdwn
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
Another [[special_remote|special_remotes]] that git-annex can use is
|
||||||
|
a [[special_remotes/ddar]] repository. ddar stores large file contents
|
||||||
|
in a directory structure of its own, with deduplication. For remote
|
||||||
|
repositories, ddar requires that ssh is available on the remote, with ddar also
|
||||||
|
installed remotely. When copying files to the remote, ddar only needs to send
|
||||||
|
over the network the parts of the files that are not already present remotely.
|
||||||
|
|
||||||
|
Unlike bup, ddar uses its own storage format, which allows for both creation
|
||||||
|
and deletion of de-deduplicated files.
|
||||||
|
|
||||||
|
Here's how to create a ddar remote, and describe it.
|
||||||
|
|
||||||
|
[[!template id=note text="""
|
||||||
|
Instead of specifying a remote system, you could choose to make a bup
|
||||||
|
remote that is only accessible on the current system, by passing
|
||||||
|
"ddarrepo=/big/myddar".
|
||||||
|
"""]]
|
||||||
|
|
||||||
|
# git annex initremote myddar type=ddar encryption=none ddarrepo=example.com:/big/myddar
|
||||||
|
initremote ddar (bup init)
|
||||||
|
Initialized empty Git repository in /big/myddar/
|
||||||
|
ok
|
||||||
|
# git annex describe myddar "my bup repository at example.com"
|
||||||
|
describe myddar ok
|
||||||
|
|
||||||
|
Now the remote can be used like any other remote.
|
||||||
|
|
||||||
|
# git annex move my_cool_big_file --to myddar
|
||||||
|
move my_cool_big_file (to myddar...)
|
||||||
|
ok
|
||||||
|
|
||||||
|
See [[special_remotes/bup]] for details.
|
Loading…
Reference in a new issue