git annex get from exports

Straightforward enough, except for the needed belt-and-suspenders sanity
checks to avoid foot shooting due to exports not being key/value stores.

* Even when annex.verify=false, always verify from exports.
* Only get files from exports that use a backend that supports
  checksum verification.
* Never trust exports, even if the user says to, because then
  `git annex drop` would drop content if the export seemed to contain
  a copy.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-09-04 16:39:56 -04:00
parent 4da763439b
commit 662f2a5ee7
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 90 additions and 21 deletions

View file

@ -354,8 +354,12 @@ shouldVerify :: VerifyConfig -> Annex Bool
shouldVerify AlwaysVerify = return True
shouldVerify NoVerify = return False
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
shouldVerify (RemoteVerify r) = shouldVerify DefaultVerify
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))
shouldVerify (RemoteVerify r) =
(shouldVerify DefaultVerify
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)))
-- Export remotes are not key/value stores, so always verify
-- content from them even when verification is disabled.
<||> Types.Remote.exportSupported (Types.Remote.exportActions r)
{- Checks if there is enough free disk space to download a key
- to its temp file.

View file

@ -65,10 +65,16 @@ trustMap = maybe trustMapLoad return =<< Annex.getState Annex.trustmap
trustMapLoad :: Annex TrustMap
trustMapLoad = do
overrides <- Annex.getState Annex.forcetrust
l <- remoteList
-- Exports are never trusted, since they are not key/value stores.
exports <- filterM (Types.Remote.exportSupported . Types.Remote.exportActions) l
let exportoverrides = M.fromList $
map (\r -> (Types.Remote.uuid r, UnTrusted)) exports
logged <- trustMapRaw
configured <- M.fromList . catMaybes
<$> (map configuredtrust <$> remoteList)
let m = M.union overrides $ M.union configured logged
let configured = M.fromList $ mapMaybe configuredtrust l
let m = M.union exportoverrides $
M.union overrides $
M.union configured logged
Annex.changeState $ \s -> s { Annex.trustmap = Just m }
return m
where

View file

@ -44,7 +44,7 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot
gen r u c gc = do
cst <- remoteCost gc cheapRemoteCost
let chunkconfig = getChunkConfig c
return $ Just $ exportableRemote $ specialRemote c
exportableRemote $ specialRemote c
(prepareStore dir chunkconfig)
(retrieve dir chunkconfig)
(simplyPrepare $ remove dir)

View file

@ -8,9 +8,15 @@
module Remote.Helper.Export where
import Annex.Common
import qualified Annex
import Types.Remote
import Types.Creds
import Types.Backend
import Types.Key
import Types.TrustLevel
import Backend
import Remote.Helper.Encryptable (isEncrypted)
import Database.Export
import qualified Data.Map as M
@ -27,15 +33,59 @@ exportUnsupported = ExportActions
-- | A remote that supports exports when configured with exporttree=yes,
-- and otherwise does not.
exportableRemote :: Remote -> Remote
exportableRemote :: Remote -> Annex (Maybe Remote)
exportableRemote r = case M.lookup "exporttree" (config r) of
Just "yes" -> r
{ storeKey = \_ _ _ -> do
warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
return False
}
_ -> r
{ exportActions = exportUnsupported }
Just "yes" -> do
db <- openDb (uuid r)
return $ Just $ r
-- Storing a key on an export would need a way to
-- look up the file(s) that the currently exported
-- tree uses for a key; there's not currently an
-- inexpensive way to do that (getExportLocation
-- only finds files that have been stored on the
-- export already).
{ storeKey = \_ _ _ -> do
warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
return False
-- Keys can be retrieved, but since an export
-- is not a true key/value store, the content of
-- the key has to be able to be strongly verified.
, retrieveKeyFile = \k _af dest p ->
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
then do
locs <- liftIO $ getExportLocation db k
case locs of
[] -> do
warning "unknown export location"
return (False, UnVerified)
(l:_) -> retrieveExport (exportActions r) k l dest p
else do
warning $ "exported content cannot be verified due to using the " ++ formatKeyVariety (keyVariety k) ++ " backend"
return (False, UnVerified)
, retrieveKeyFileCheap = \_ _ _ -> return False
-- Remove all files a key was exported to.
, removeKey = \k -> do
locs <- liftIO $ getExportLocation db k
oks <- forM locs $ \loc -> do
ok <- removeExport (exportActions r) k loc
when ok $
liftIO $ removeExportLocation db k loc
return ok
liftIO $ flushDbQueue db
return (and oks)
-- Can't lock content on exports, since they're
-- not key/value stores, and someone else could
-- change what's exported to a file at any time.
, lockContent = Nothing
-- Check if any of the files a key was exported
-- to are present. This doesn't guarantee the
-- export contains the right content.
, checkPresent = \k ->
anyM (checkPresentExport (exportActions r) k)
=<< liftIO (getExportLocation db k)
}
_ -> return $ Just $ r { exportActions = exportUnsupported }
exportableRemoteSetup :: (SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)) -> SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
exportableRemoteSetup setupaction st mu cp c gc = case st of

View file

@ -159,7 +159,7 @@ unVerified a = do
-- The FilePath will be relative, and may contain unix-style path
-- separators.
newtype ExportLocation = ExportLocation FilePath
deriving (Eq)
deriving (Show, Eq)
data ExportActions a = ExportActions
{ exportSupported :: a Bool

View file

@ -21,7 +21,7 @@ import Types.UUID
-- This order may seem backwards, but we generally want to list dead
-- remotes last and trusted ones first.
data TrustLevel = Trusted | SemiTrusted | UnTrusted | DeadTrusted
deriving (Eq, Enum, Ord, Bounded)
deriving (Eq, Enum, Ord, Bounded, Show)
instance Default TrustLevel where
def = SemiTrusted

View file

@ -11,11 +11,11 @@ git annex export `treeish --to remote`
Use this command to export a tree of files from a git-annex repository.
Normally files are stored on a git-annex special remote named by their
keys. That is great for data storage, but your filenames are obscured.
Exporting replicates the tree to the special remote as-is.
keys. That is great for reliable data storage, but your filenames are
obscured. Exporting replicates the tree to the special remote as-is.
Mixing key/value and exports in the same remote would be a mess and so is
not allowed. So, you have to configure a remote with `exporttree=yes`
Mixing key/value storage and exports in the same remote would be a mess and
so is not allowed. You have to configure a remote with `exporttree=yes`
when initially setting it up with [[git-annex-initremote]](1).
Repeated exports are done efficiently, by diffing the old and new tree,
@ -24,6 +24,13 @@ and transferring only the changed files.
Exports can be interrupted and resumed. However, partially uploaded files
will be re-started from the beginning.
Once content has been exported to a remote, commands like `git annex get`
can download content from there the same as from other remotes. However,
since an export is not a key/value store, git-annex has to do more
verification of content downloaded from an export. Some types of keys,
that are not based on checksums, cannot be downloaded from an export.
And, git-annex will never trust an export to retain the content of a key.
# SEE ALSO
[[git-annex]](1)

View file

@ -17,7 +17,9 @@ there need to be a new interface in supported remotes?
Work is in progress. Todo list:
* Use retrieveExport when getting from export remotes.
* `git annex get --from export` works in the repo that exported to it,
but in another repo, the export db won't be populated, so it won't work.
Maybe just show a useful error message in this case?
* Efficient handling of renames.
* Support export to aditional special remotes (S3 etc)
* Support export to external special remotes.