git annex get from exports
Straightforward enough, except for the needed belt-and-suspenders sanity checks to avoid foot shooting due to exports not being key/value stores. * Even when annex.verify=false, always verify from exports. * Only get files from exports that use a backend that supports checksum verification. * Never trust exports, even if the user says to, because then `git annex drop` would drop content if the export seemed to contain a copy. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
4da763439b
commit
662f2a5ee7
8 changed files with 90 additions and 21 deletions
|
@ -354,8 +354,12 @@ shouldVerify :: VerifyConfig -> Annex Bool
|
|||
shouldVerify AlwaysVerify = return True
|
||||
shouldVerify NoVerify = return False
|
||||
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
|
||||
shouldVerify (RemoteVerify r) = shouldVerify DefaultVerify
|
||||
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))
|
||||
shouldVerify (RemoteVerify r) =
|
||||
(shouldVerify DefaultVerify
|
||||
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)))
|
||||
-- Export remotes are not key/value stores, so always verify
|
||||
-- content from them even when verification is disabled.
|
||||
<||> Types.Remote.exportSupported (Types.Remote.exportActions r)
|
||||
|
||||
{- Checks if there is enough free disk space to download a key
|
||||
- to its temp file.
|
||||
|
|
|
@ -65,10 +65,16 @@ trustMap = maybe trustMapLoad return =<< Annex.getState Annex.trustmap
|
|||
trustMapLoad :: Annex TrustMap
|
||||
trustMapLoad = do
|
||||
overrides <- Annex.getState Annex.forcetrust
|
||||
l <- remoteList
|
||||
-- Exports are never trusted, since they are not key/value stores.
|
||||
exports <- filterM (Types.Remote.exportSupported . Types.Remote.exportActions) l
|
||||
let exportoverrides = M.fromList $
|
||||
map (\r -> (Types.Remote.uuid r, UnTrusted)) exports
|
||||
logged <- trustMapRaw
|
||||
configured <- M.fromList . catMaybes
|
||||
<$> (map configuredtrust <$> remoteList)
|
||||
let m = M.union overrides $ M.union configured logged
|
||||
let configured = M.fromList $ mapMaybe configuredtrust l
|
||||
let m = M.union exportoverrides $
|
||||
M.union overrides $
|
||||
M.union configured logged
|
||||
Annex.changeState $ \s -> s { Annex.trustmap = Just m }
|
||||
return m
|
||||
where
|
||||
|
|
|
@ -44,7 +44,7 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot
|
|||
gen r u c gc = do
|
||||
cst <- remoteCost gc cheapRemoteCost
|
||||
let chunkconfig = getChunkConfig c
|
||||
return $ Just $ exportableRemote $ specialRemote c
|
||||
exportableRemote $ specialRemote c
|
||||
(prepareStore dir chunkconfig)
|
||||
(retrieve dir chunkconfig)
|
||||
(simplyPrepare $ remove dir)
|
||||
|
|
|
@ -8,9 +8,15 @@
|
|||
module Remote.Helper.Export where
|
||||
|
||||
import Annex.Common
|
||||
import qualified Annex
|
||||
import Types.Remote
|
||||
import Types.Creds
|
||||
import Types.Backend
|
||||
import Types.Key
|
||||
import Types.TrustLevel
|
||||
import Backend
|
||||
import Remote.Helper.Encryptable (isEncrypted)
|
||||
import Database.Export
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
|
@ -27,15 +33,59 @@ exportUnsupported = ExportActions
|
|||
|
||||
-- | A remote that supports exports when configured with exporttree=yes,
|
||||
-- and otherwise does not.
|
||||
exportableRemote :: Remote -> Remote
|
||||
exportableRemote :: Remote -> Annex (Maybe Remote)
|
||||
exportableRemote r = case M.lookup "exporttree" (config r) of
|
||||
Just "yes" -> r
|
||||
{ storeKey = \_ _ _ -> do
|
||||
warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
|
||||
return False
|
||||
}
|
||||
_ -> r
|
||||
{ exportActions = exportUnsupported }
|
||||
Just "yes" -> do
|
||||
db <- openDb (uuid r)
|
||||
|
||||
return $ Just $ r
|
||||
-- Storing a key on an export would need a way to
|
||||
-- look up the file(s) that the currently exported
|
||||
-- tree uses for a key; there's not currently an
|
||||
-- inexpensive way to do that (getExportLocation
|
||||
-- only finds files that have been stored on the
|
||||
-- export already).
|
||||
{ storeKey = \_ _ _ -> do
|
||||
warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
|
||||
return False
|
||||
-- Keys can be retrieved, but since an export
|
||||
-- is not a true key/value store, the content of
|
||||
-- the key has to be able to be strongly verified.
|
||||
, retrieveKeyFile = \k _af dest p ->
|
||||
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
|
||||
then do
|
||||
locs <- liftIO $ getExportLocation db k
|
||||
case locs of
|
||||
[] -> do
|
||||
warning "unknown export location"
|
||||
return (False, UnVerified)
|
||||
(l:_) -> retrieveExport (exportActions r) k l dest p
|
||||
else do
|
||||
warning $ "exported content cannot be verified due to using the " ++ formatKeyVariety (keyVariety k) ++ " backend"
|
||||
return (False, UnVerified)
|
||||
, retrieveKeyFileCheap = \_ _ _ -> return False
|
||||
-- Remove all files a key was exported to.
|
||||
, removeKey = \k -> do
|
||||
locs <- liftIO $ getExportLocation db k
|
||||
oks <- forM locs $ \loc -> do
|
||||
ok <- removeExport (exportActions r) k loc
|
||||
when ok $
|
||||
liftIO $ removeExportLocation db k loc
|
||||
return ok
|
||||
liftIO $ flushDbQueue db
|
||||
return (and oks)
|
||||
-- Can't lock content on exports, since they're
|
||||
-- not key/value stores, and someone else could
|
||||
-- change what's exported to a file at any time.
|
||||
, lockContent = Nothing
|
||||
-- Check if any of the files a key was exported
|
||||
-- to are present. This doesn't guarantee the
|
||||
-- export contains the right content.
|
||||
, checkPresent = \k ->
|
||||
anyM (checkPresentExport (exportActions r) k)
|
||||
=<< liftIO (getExportLocation db k)
|
||||
}
|
||||
_ -> return $ Just $ r { exportActions = exportUnsupported }
|
||||
|
||||
exportableRemoteSetup :: (SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)) -> SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
|
||||
exportableRemoteSetup setupaction st mu cp c gc = case st of
|
||||
|
|
|
@ -159,7 +159,7 @@ unVerified a = do
|
|||
-- The FilePath will be relative, and may contain unix-style path
|
||||
-- separators.
|
||||
newtype ExportLocation = ExportLocation FilePath
|
||||
deriving (Eq)
|
||||
deriving (Show, Eq)
|
||||
|
||||
data ExportActions a = ExportActions
|
||||
{ exportSupported :: a Bool
|
||||
|
|
|
@ -21,7 +21,7 @@ import Types.UUID
|
|||
-- This order may seem backwards, but we generally want to list dead
|
||||
-- remotes last and trusted ones first.
|
||||
data TrustLevel = Trusted | SemiTrusted | UnTrusted | DeadTrusted
|
||||
deriving (Eq, Enum, Ord, Bounded)
|
||||
deriving (Eq, Enum, Ord, Bounded, Show)
|
||||
|
||||
instance Default TrustLevel where
|
||||
def = SemiTrusted
|
||||
|
|
|
@ -11,11 +11,11 @@ git annex export `treeish --to remote`
|
|||
Use this command to export a tree of files from a git-annex repository.
|
||||
|
||||
Normally files are stored on a git-annex special remote named by their
|
||||
keys. That is great for data storage, but your filenames are obscured.
|
||||
Exporting replicates the tree to the special remote as-is.
|
||||
keys. That is great for reliable data storage, but your filenames are
|
||||
obscured. Exporting replicates the tree to the special remote as-is.
|
||||
|
||||
Mixing key/value and exports in the same remote would be a mess and so is
|
||||
not allowed. So, you have to configure a remote with `exporttree=yes`
|
||||
Mixing key/value storage and exports in the same remote would be a mess and
|
||||
so is not allowed. You have to configure a remote with `exporttree=yes`
|
||||
when initially setting it up with [[git-annex-initremote]](1).
|
||||
|
||||
Repeated exports are done efficiently, by diffing the old and new tree,
|
||||
|
@ -24,6 +24,13 @@ and transferring only the changed files.
|
|||
Exports can be interrupted and resumed. However, partially uploaded files
|
||||
will be re-started from the beginning.
|
||||
|
||||
Once content has been exported to a remote, commands like `git annex get`
|
||||
can download content from there the same as from other remotes. However,
|
||||
since an export is not a key/value store, git-annex has to do more
|
||||
verification of content downloaded from an export. Some types of keys,
|
||||
that are not based on checksums, cannot be downloaded from an export.
|
||||
And, git-annex will never trust an export to retain the content of a key.
|
||||
|
||||
# SEE ALSO
|
||||
|
||||
[[git-annex]](1)
|
||||
|
|
|
@ -17,7 +17,9 @@ there need to be a new interface in supported remotes?
|
|||
|
||||
Work is in progress. Todo list:
|
||||
|
||||
* Use retrieveExport when getting from export remotes.
|
||||
* `git annex get --from export` works in the repo that exported to it,
|
||||
but in another repo, the export db won't be populated, so it won't work.
|
||||
Maybe just show a useful error message in this case?
|
||||
* Efficient handling of renames.
|
||||
* Support export to aditional special remotes (S3 etc)
|
||||
* Support export to external special remotes.
|
||||
|
|
Loading…
Reference in a new issue