git annex get from exports
Straightforward enough, except for the needed belt-and-suspenders sanity checks to avoid foot shooting due to exports not being key/value stores. * Even when annex.verify=false, always verify from exports. * Only get files from exports that use a backend that supports checksum verification. * Never trust exports, even if the user says to, because then `git annex drop` would drop content if the export seemed to contain a copy. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
4da763439b
commit
662f2a5ee7
8 changed files with 90 additions and 21 deletions
|
@ -354,8 +354,12 @@ shouldVerify :: VerifyConfig -> Annex Bool
|
||||||
shouldVerify AlwaysVerify = return True
|
shouldVerify AlwaysVerify = return True
|
||||||
shouldVerify NoVerify = return False
|
shouldVerify NoVerify = return False
|
||||||
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
|
shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig
|
||||||
shouldVerify (RemoteVerify r) = shouldVerify DefaultVerify
|
shouldVerify (RemoteVerify r) =
|
||||||
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))
|
(shouldVerify DefaultVerify
|
||||||
|
<&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)))
|
||||||
|
-- Export remotes are not key/value stores, so always verify
|
||||||
|
-- content from them even when verification is disabled.
|
||||||
|
<||> Types.Remote.exportSupported (Types.Remote.exportActions r)
|
||||||
|
|
||||||
{- Checks if there is enough free disk space to download a key
|
{- Checks if there is enough free disk space to download a key
|
||||||
- to its temp file.
|
- to its temp file.
|
||||||
|
|
|
@ -65,10 +65,16 @@ trustMap = maybe trustMapLoad return =<< Annex.getState Annex.trustmap
|
||||||
trustMapLoad :: Annex TrustMap
|
trustMapLoad :: Annex TrustMap
|
||||||
trustMapLoad = do
|
trustMapLoad = do
|
||||||
overrides <- Annex.getState Annex.forcetrust
|
overrides <- Annex.getState Annex.forcetrust
|
||||||
|
l <- remoteList
|
||||||
|
-- Exports are never trusted, since they are not key/value stores.
|
||||||
|
exports <- filterM (Types.Remote.exportSupported . Types.Remote.exportActions) l
|
||||||
|
let exportoverrides = M.fromList $
|
||||||
|
map (\r -> (Types.Remote.uuid r, UnTrusted)) exports
|
||||||
logged <- trustMapRaw
|
logged <- trustMapRaw
|
||||||
configured <- M.fromList . catMaybes
|
let configured = M.fromList $ mapMaybe configuredtrust l
|
||||||
<$> (map configuredtrust <$> remoteList)
|
let m = M.union exportoverrides $
|
||||||
let m = M.union overrides $ M.union configured logged
|
M.union overrides $
|
||||||
|
M.union configured logged
|
||||||
Annex.changeState $ \s -> s { Annex.trustmap = Just m }
|
Annex.changeState $ \s -> s { Annex.trustmap = Just m }
|
||||||
return m
|
return m
|
||||||
where
|
where
|
||||||
|
|
|
@ -44,7 +44,7 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot
|
||||||
gen r u c gc = do
|
gen r u c gc = do
|
||||||
cst <- remoteCost gc cheapRemoteCost
|
cst <- remoteCost gc cheapRemoteCost
|
||||||
let chunkconfig = getChunkConfig c
|
let chunkconfig = getChunkConfig c
|
||||||
return $ Just $ exportableRemote $ specialRemote c
|
exportableRemote $ specialRemote c
|
||||||
(prepareStore dir chunkconfig)
|
(prepareStore dir chunkconfig)
|
||||||
(retrieve dir chunkconfig)
|
(retrieve dir chunkconfig)
|
||||||
(simplyPrepare $ remove dir)
|
(simplyPrepare $ remove dir)
|
||||||
|
|
|
@ -8,9 +8,15 @@
|
||||||
module Remote.Helper.Export where
|
module Remote.Helper.Export where
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
|
import qualified Annex
|
||||||
import Types.Remote
|
import Types.Remote
|
||||||
import Types.Creds
|
import Types.Creds
|
||||||
|
import Types.Backend
|
||||||
|
import Types.Key
|
||||||
|
import Types.TrustLevel
|
||||||
|
import Backend
|
||||||
import Remote.Helper.Encryptable (isEncrypted)
|
import Remote.Helper.Encryptable (isEncrypted)
|
||||||
|
import Database.Export
|
||||||
|
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
|
||||||
|
@ -27,15 +33,59 @@ exportUnsupported = ExportActions
|
||||||
|
|
||||||
-- | A remote that supports exports when configured with exporttree=yes,
|
-- | A remote that supports exports when configured with exporttree=yes,
|
||||||
-- and otherwise does not.
|
-- and otherwise does not.
|
||||||
exportableRemote :: Remote -> Remote
|
exportableRemote :: Remote -> Annex (Maybe Remote)
|
||||||
exportableRemote r = case M.lookup "exporttree" (config r) of
|
exportableRemote r = case M.lookup "exporttree" (config r) of
|
||||||
Just "yes" -> r
|
Just "yes" -> do
|
||||||
{ storeKey = \_ _ _ -> do
|
db <- openDb (uuid r)
|
||||||
warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
|
|
||||||
return False
|
return $ Just $ r
|
||||||
}
|
-- Storing a key on an export would need a way to
|
||||||
_ -> r
|
-- look up the file(s) that the currently exported
|
||||||
{ exportActions = exportUnsupported }
|
-- tree uses for a key; there's not currently an
|
||||||
|
-- inexpensive way to do that (getExportLocation
|
||||||
|
-- only finds files that have been stored on the
|
||||||
|
-- export already).
|
||||||
|
{ storeKey = \_ _ _ -> do
|
||||||
|
warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
|
||||||
|
return False
|
||||||
|
-- Keys can be retrieved, but since an export
|
||||||
|
-- is not a true key/value store, the content of
|
||||||
|
-- the key has to be able to be strongly verified.
|
||||||
|
, retrieveKeyFile = \k _af dest p ->
|
||||||
|
if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k))
|
||||||
|
then do
|
||||||
|
locs <- liftIO $ getExportLocation db k
|
||||||
|
case locs of
|
||||||
|
[] -> do
|
||||||
|
warning "unknown export location"
|
||||||
|
return (False, UnVerified)
|
||||||
|
(l:_) -> retrieveExport (exportActions r) k l dest p
|
||||||
|
else do
|
||||||
|
warning $ "exported content cannot be verified due to using the " ++ formatKeyVariety (keyVariety k) ++ " backend"
|
||||||
|
return (False, UnVerified)
|
||||||
|
, retrieveKeyFileCheap = \_ _ _ -> return False
|
||||||
|
-- Remove all files a key was exported to.
|
||||||
|
, removeKey = \k -> do
|
||||||
|
locs <- liftIO $ getExportLocation db k
|
||||||
|
oks <- forM locs $ \loc -> do
|
||||||
|
ok <- removeExport (exportActions r) k loc
|
||||||
|
when ok $
|
||||||
|
liftIO $ removeExportLocation db k loc
|
||||||
|
return ok
|
||||||
|
liftIO $ flushDbQueue db
|
||||||
|
return (and oks)
|
||||||
|
-- Can't lock content on exports, since they're
|
||||||
|
-- not key/value stores, and someone else could
|
||||||
|
-- change what's exported to a file at any time.
|
||||||
|
, lockContent = Nothing
|
||||||
|
-- Check if any of the files a key was exported
|
||||||
|
-- to are present. This doesn't guarantee the
|
||||||
|
-- export contains the right content.
|
||||||
|
, checkPresent = \k ->
|
||||||
|
anyM (checkPresentExport (exportActions r) k)
|
||||||
|
=<< liftIO (getExportLocation db k)
|
||||||
|
}
|
||||||
|
_ -> return $ Just $ r { exportActions = exportUnsupported }
|
||||||
|
|
||||||
exportableRemoteSetup :: (SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)) -> SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
|
exportableRemoteSetup :: (SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)) -> SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
|
||||||
exportableRemoteSetup setupaction st mu cp c gc = case st of
|
exportableRemoteSetup setupaction st mu cp c gc = case st of
|
||||||
|
|
|
@ -159,7 +159,7 @@ unVerified a = do
|
||||||
-- The FilePath will be relative, and may contain unix-style path
|
-- The FilePath will be relative, and may contain unix-style path
|
||||||
-- separators.
|
-- separators.
|
||||||
newtype ExportLocation = ExportLocation FilePath
|
newtype ExportLocation = ExportLocation FilePath
|
||||||
deriving (Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
data ExportActions a = ExportActions
|
data ExportActions a = ExportActions
|
||||||
{ exportSupported :: a Bool
|
{ exportSupported :: a Bool
|
||||||
|
|
|
@ -21,7 +21,7 @@ import Types.UUID
|
||||||
-- This order may seem backwards, but we generally want to list dead
|
-- This order may seem backwards, but we generally want to list dead
|
||||||
-- remotes last and trusted ones first.
|
-- remotes last and trusted ones first.
|
||||||
data TrustLevel = Trusted | SemiTrusted | UnTrusted | DeadTrusted
|
data TrustLevel = Trusted | SemiTrusted | UnTrusted | DeadTrusted
|
||||||
deriving (Eq, Enum, Ord, Bounded)
|
deriving (Eq, Enum, Ord, Bounded, Show)
|
||||||
|
|
||||||
instance Default TrustLevel where
|
instance Default TrustLevel where
|
||||||
def = SemiTrusted
|
def = SemiTrusted
|
||||||
|
|
|
@ -11,11 +11,11 @@ git annex export `treeish --to remote`
|
||||||
Use this command to export a tree of files from a git-annex repository.
|
Use this command to export a tree of files from a git-annex repository.
|
||||||
|
|
||||||
Normally files are stored on a git-annex special remote named by their
|
Normally files are stored on a git-annex special remote named by their
|
||||||
keys. That is great for data storage, but your filenames are obscured.
|
keys. That is great for reliable data storage, but your filenames are
|
||||||
Exporting replicates the tree to the special remote as-is.
|
obscured. Exporting replicates the tree to the special remote as-is.
|
||||||
|
|
||||||
Mixing key/value and exports in the same remote would be a mess and so is
|
Mixing key/value storage and exports in the same remote would be a mess and
|
||||||
not allowed. So, you have to configure a remote with `exporttree=yes`
|
so is not allowed. You have to configure a remote with `exporttree=yes`
|
||||||
when initially setting it up with [[git-annex-initremote]](1).
|
when initially setting it up with [[git-annex-initremote]](1).
|
||||||
|
|
||||||
Repeated exports are done efficiently, by diffing the old and new tree,
|
Repeated exports are done efficiently, by diffing the old and new tree,
|
||||||
|
@ -24,6 +24,13 @@ and transferring only the changed files.
|
||||||
Exports can be interrupted and resumed. However, partially uploaded files
|
Exports can be interrupted and resumed. However, partially uploaded files
|
||||||
will be re-started from the beginning.
|
will be re-started from the beginning.
|
||||||
|
|
||||||
|
Once content has been exported to a remote, commands like `git annex get`
|
||||||
|
can download content from there the same as from other remotes. However,
|
||||||
|
since an export is not a key/value store, git-annex has to do more
|
||||||
|
verification of content downloaded from an export. Some types of keys,
|
||||||
|
that are not based on checksums, cannot be downloaded from an export.
|
||||||
|
And, git-annex will never trust an export to retain the content of a key.
|
||||||
|
|
||||||
# SEE ALSO
|
# SEE ALSO
|
||||||
|
|
||||||
[[git-annex]](1)
|
[[git-annex]](1)
|
||||||
|
|
|
@ -17,7 +17,9 @@ there need to be a new interface in supported remotes?
|
||||||
|
|
||||||
Work is in progress. Todo list:
|
Work is in progress. Todo list:
|
||||||
|
|
||||||
* Use retrieveExport when getting from export remotes.
|
* `git annex get --from export` works in the repo that exported to it,
|
||||||
|
but in another repo, the export db won't be populated, so it won't work.
|
||||||
|
Maybe just show a useful error message in this case?
|
||||||
* Efficient handling of renames.
|
* Efficient handling of renames.
|
||||||
* Support export to aditional special remotes (S3 etc)
|
* Support export to aditional special remotes (S3 etc)
|
||||||
* Support export to external special remotes.
|
* Support export to external special remotes.
|
||||||
|
|
Loading…
Reference in a new issue