add thirdPartyPopulated interface

This is to support, eg a borg repo as a special remote, which is
populated not by running git-annex commands, but by using borg. Then
git-annex sync lists the content of the remote, learns which files are
annex objects, and treats those as present in the remote.

So, most of the import machinery is reused, to a new purpose. While
normally importtree maintains a remote tracking branch, this does not,
because the files stored in the remote are annex object files, not
user-visible filenames. But, internally, a git tree is still generated,
of the files on the remote that are annex objects. This tree is used
by retrieveExportWithContentIdentifier, etc. As with other import/export
remotes, that  the tree is recorded in the export log, and gets grafted
into the git-annex branch.

importKey changed to be able to return Nothing, to indicate when an
ImportLocation is not an annex object and so should be skipped from
being included in the tree.

It did not seem to make sense to have git-annex import do this, since
from the user's perspective, it's not like other imports. So only
git-annex sync does it.

Note that, git-annex sync does not yet download objects from such
remotes that are preferred content. importKeys is run with
content downloading disabled, to avoid getting the content of all
objects. Perhaps what's needed is for seekSyncContent to be run with these
remotes, but I don't know if it will just work (in particular, it needs
to avoid trying to transfer objects to them), so I skipped that for now.

(Untested and unused as of yet.)

This commit was sponsored by Jochen Bartl on Patreon.
This commit is contained in:
Joey Hess 2020-12-18 14:52:57 -04:00
parent 037f8b6863
commit 9a2c8757f3
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
23 changed files with 176 additions and 77 deletions

View file

@ -12,6 +12,7 @@ module Annex.Import (
ImportCommitConfig(..),
buildImportCommit,
buildImportTrees,
recordImportTree,
canImportKeys,
importKeys,
makeImportMatcher,
@ -104,6 +105,28 @@ buildImportCommit remote importtreeconfig importcommitconfig importable =
Just trackingcommit -> inRepo (Git.Ref.tree trackingcommit) >>= \case
Nothing -> go Nothing
Just _ -> go (Just trackingcommit)
where
go trackingcommit = do
(imported, updatestate) <- recordImportTree remote importtreeconfig importable
buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case
Just finalcommit -> do
updatestate
return (Just finalcommit)
Nothing -> return Nothing
{- Builds a tree for an import from a special remote.
-
- Also returns an action that can be used to update
- all the other state to record the import.
-}
recordImportTree
:: Remote
-> ImportTreeConfig
-> ImportableContents (Either Sha Key)
-> Annex (History Sha, Annex ())
recordImportTree remote importtreeconfig importable = do
imported@(History finaltree _) <- buildImportTrees basetree subdir importable
return (imported, updatestate finaltree)
where
basetree = case importtreeconfig of
ImportTree -> emptyTree
@ -112,21 +135,12 @@ buildImportCommit remote importtreeconfig importcommitconfig importable =
ImportTree -> Nothing
ImportSubTree dir _ -> Just dir
go trackingcommit = do
imported@(History finaltree _) <-
buildImportTrees basetree subdir importable
buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case
Just finalcommit -> do
updatestate finaltree
return (Just finalcommit)
Nothing -> return Nothing
updatestate committedtree = do
updatestate finaltree = do
importedtree <- case subdir of
Nothing -> pure committedtree
Nothing -> pure finaltree
Just dir ->
let subtreeref = Ref $
fromRef' committedtree
fromRef' finaltree
<> ":"
<> getTopFilePath dir
in fromMaybe emptyTree
@ -308,9 +322,10 @@ importKeys
:: Remote
-> ImportTreeConfig
-> Bool
-> Bool
-> ImportableContents (ContentIdentifier, ByteSize)
-> Annex (Maybe (ImportableContents (Either Sha Key)))
importKeys remote importtreeconfig importcontent importablecontents = do
importKeys remote importtreeconfig importcontent ignorelargefilesconfig importablecontents = do
unless (canImportKeys remote importcontent) $
giveup "This remote does not support importing without downloading content."
-- This map is used to remember content identifiers that
@ -400,7 +415,7 @@ importKeys remote importtreeconfig importcontent importablecontents = do
let act = if importcontent
then case Remote.importKey ia of
Nothing -> dodownload
Just _ -> if Utility.Matcher.introspect matchNeedsFileContent matcher
Just _ -> if not ignorelargefilesconfig && Utility.Matcher.introspect matchNeedsFileContent matcher
then dodownload
else doimport
else doimport
@ -410,7 +425,7 @@ importKeys remote importtreeconfig importcontent importablecontents = do
case Remote.importKey ia of
Nothing -> error "internal" -- checked earlier
Just importkey -> do
when (Utility.Matcher.introspect matchNeedsFileContent matcher) $
when (not ignorelargefilesconfig && Utility.Matcher.introspect matchNeedsFileContent matcher) $
giveup "annex.largefiles configuration examines file contents, so cannot import without content."
let mi = MatchingInfo ProvidedInfo
{ providedFilePath = f
@ -419,7 +434,9 @@ importKeys remote importtreeconfig importcontent importablecontents = do
, providedMimeType = Nothing
, providedMimeEncoding = Nothing
}
islargefile <- checkMatcher' matcher mi mempty
islargefile <- if ignorelargefilesconfig
then pure True
else checkMatcher' matcher mi mempty
metered Nothing sz $ const $ if islargefile
then doimportlarge importkey cidmap db loc cid sz f
else doimportsmall cidmap db loc cid sz
@ -433,24 +450,26 @@ importKeys remote importtreeconfig importcontent importablecontents = do
return Nothing
where
importer = do
unsizedk <- importkey loc cid
-- Don't display progress when generating
-- key, if the content will later be
-- downloaded, which is a more expensive
-- operation generally.
(if importcontent then nullMeterUpdate else p)
-- This avoids every remote needing
-- to add the size.
let k = alterKey unsizedk $ \kd -> kd
{ keySize = keySize kd <|> Just sz }
checkSecureHashes k >>= \case
Nothing -> do
recordcidkey cidmap db cid k
logChange k (Remote.uuid remote) InfoPresent
if importcontent
then getcontent k
else return (Just (k, True))
Just msg -> giveup (msg ++ " to import")
-- Don't display progress when generating
-- key, if the content will later be
-- downloaded, which is a more expensive
-- operation generally.
let p' = if importcontent then nullMeterUpdate else p
importkey loc cid p' >>= \case
Nothing -> return Nothing
Just unsizedk -> do
-- This avoids every remote needing
-- to add the size.
let k = alterKey unsizedk $ \kd -> kd
{ keySize = keySize kd <|> Just sz }
checkSecureHashes k >>= \case
Nothing -> do
recordcidkey cidmap db cid k
logChange k (Remote.uuid remote) InfoPresent
if importcontent
then getcontent k
else return (Just (k, True))
Just msg -> giveup (msg ++ " to import")
getcontent :: Key -> Annex (Maybe (Key, Bool))
getcontent k = do
@ -533,7 +552,9 @@ importKeys remote importtreeconfig importcontent importablecontents = do
, contentFile = Just tmpfile
, matchKey = Nothing
}
islargefile <- checkMatcher' matcher mi mempty
islargefile <- if ignorelargefilesconfig
then pure True
else checkMatcher' matcher mi mempty
if islargefile
then do
backend <- chooseBackend f

View file

@ -306,7 +306,7 @@ seekRemote remote branch msubdir importcontent ci = do
void $ includeCommandAction (listContents remote importtreeconfig ci importabletvar)
liftIO (atomically (readTVar importabletvar)) >>= \case
Nothing -> return ()
Just importable -> importKeys remote importtreeconfig importcontent importable >>= \case
Just importable -> importKeys remote importtreeconfig importcontent False importable >>= \case
Nothing -> warning $ concat
[ "Failed to import some files from "
, Remote.name remote

View file

@ -67,7 +67,7 @@ import Annex.UpdateInstead
import Annex.Export
import Annex.TaggedPush
import Annex.CurrentBranch
import Annex.Import (canImportKeys)
import Annex.Import
import Annex.CheckIgnore
import Types.FileMatcher
import qualified Database.Export as Export
@ -77,6 +77,7 @@ import Utility.Process.Transcript
import Utility.Tuple
import Control.Concurrent.MVar
import Control.Concurrent.STM
import qualified Data.Map as M
import qualified Data.ByteString as S
import Data.Char
@ -463,6 +464,9 @@ pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && want
importRemote :: Bool -> SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandSeek
importRemote importcontent o mergeconfig remote currbranch
| not (pullOption o) || not wantpull = noop
| Remote.thirdPartyPopulated (Remote.remotetype remote) =
when (canImportKeys remote importcontent) $
importThirdPartyPopulated remote
| otherwise = case remoteAnnexTrackingBranch (Remote.gitconfig remote) of
Nothing -> noop
Just tb -> do
@ -479,6 +483,34 @@ importRemote importcontent o mergeconfig remote currbranch
where
wantpull = remoteAnnexPull (Remote.gitconfig remote)
{- Import from a remote that is populated by a third party, by listing
- the contents of the remote, and then adding only the files on it that
- importKey identifies to a tree. The tree is only used to keep track
- of where keys are located on the remote, no remote tracking branch is
- updated, because the filenames are the names of annex object files,
- not suitable for a tracking branch. Does not transfer any content. -}
importThirdPartyPopulated :: Remote -> CommandSeek
importThirdPartyPopulated remote = do
importabletvar <- liftIO $ newTVarIO Nothing
void $ includeCommandAction (Command.Import.listContents remote ImportTree (CheckGitIgnore False) importabletvar)
liftIO (atomically (readTVar importabletvar)) >>= \case
Nothing -> return ()
Just importable ->
importKeys remote ImportTree False False importable >>= \case
Just importablekeys -> go importablekeys
Nothing -> warning $ concat
[ "Failed to import from"
, Remote.name remote
]
where
go importablekeys = void $ includeCommandAction $ starting "pull" ai si $ do
(_imported, updatestate) <- recordImportTree remote ImportTree importablekeys
next $ do
updatestate
return True
ai = ActionItemOther (Just (Remote.name remote))
si = SeekInput []
{- The remote probably has both a master and a synced/master branch.
- Which to merge from? Well, the master has whatever latest changes
- were committed (or pushed changes, if this is a bare remote),

View file

@ -46,6 +46,7 @@ remote = specialRemoteType $ RemoteType
, setup = adbSetup
, exportSupported = exportIsSupported
, importSupported = importIsSupported
, thirdPartyPopulated = False
}
androiddirectoryField :: RemoteConfigField

View file

@ -49,6 +49,7 @@ remote = RemoteType
, setup = error "not supported"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
-- There is only one bittorrent remote, and it always exists.

View file

@ -50,6 +50,7 @@ remote = specialRemoteType $ RemoteType
, setup = bupSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
buprepoField :: RemoteConfigField

View file

@ -45,6 +45,7 @@ remote = specialRemoteType $ RemoteType
, setup = ddarSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
ddarrepoField :: RemoteConfigField

View file

@ -56,6 +56,7 @@ remote = specialRemoteType $ RemoteType
, setup = directorySetup
, exportSupported = exportIsSupported
, importSupported = importIsSupported
, thirdPartyPopulated = False
}
directoryField :: RemoteConfigField
@ -369,13 +370,13 @@ guardSameContentIdentifiers cont old new
| new == Just old = cont
| otherwise = giveup "file content has changed"
importKeyM :: RawFilePath -> ExportLocation -> ContentIdentifier -> MeterUpdate -> Annex Key
importKeyM :: RawFilePath -> ExportLocation -> ContentIdentifier -> MeterUpdate -> Annex (Maybe Key)
importKeyM dir loc cid p = do
backend <- chooseBackend f
k <- fst <$> genKey ks p backend
currcid <- liftIO $ mkContentIdentifier absf
=<< R.getFileStatus absf
guardSameContentIdentifiers (return k) cid currcid
guardSameContentIdentifiers (return (Just k)) cid currcid
where
f = fromExportLocation loc
absf = dir P.</> f

View file

@ -53,6 +53,7 @@ remote = specialRemoteType $ RemoteType
, setup = externalSetup
, exportSupported = checkExportSupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
externaltypeField :: RemoteConfigField

View file

@ -78,6 +78,7 @@ remote = specialRemoteType $ RemoteType
, setup = gCryptSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
gitRepoField :: RemoteConfigField

View file

@ -87,6 +87,7 @@ remote = RemoteType
, setup = gitSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
locationField :: RemoteConfigField

View file

@ -74,6 +74,7 @@ remote = specialRemoteType $ RemoteType
, setup = mySetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
urlField :: RemoteConfigField

View file

@ -48,6 +48,7 @@ remote = specialRemoteType $ RemoteType
, setup = glacierSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
datacenterField :: RemoteConfigField

View file

@ -72,7 +72,7 @@ importIsSupported :: ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool
importIsSupported = \_ _ -> return True
-- | Prevent or allow exporttree=yes and importtree=yes when
-- setting up a new remote, depending on exportSupported and importSupported.
-- setting up a new remote, depending on the remote's capabilities.
adjustExportImportRemoteType :: RemoteType -> RemoteType
adjustExportImportRemoteType rt = rt { setup = setup' }
where
@ -80,7 +80,7 @@ adjustExportImportRemoteType rt = rt { setup = setup' }
pc <- either giveup return . parseRemoteConfig c
=<< configParser rt c
let checkconfig supported configured configfield cont =
ifM (supported rt pc gc)
ifM (supported rt pc gc <&&> pure (not (thirdPartyPopulated rt)))
( case st of
Init
| configured pc && encryptionIsEnabled pc ->
@ -102,35 +102,40 @@ adjustExportImportRemoteType rt = rt { setup = setup' }
-- | Adjust a remote to support exporttree=yes and/or importree=yes.
adjustExportImport :: Remote -> RemoteStateHandle -> Annex Remote
adjustExportImport r rs = do
isexporttree <- pure (exportTree (config r)) <&&> isexporttreeSupported r
isimporttree <- pure (importTree (config r)) <&&> isimporttreeSupported r
isexport <- pure (exportTree (config r))
<&&> isExportSupported r
-- When thirdPartyPopulated is True, the remote
-- does not need to be configured with importTree to support
-- imports.
isimport <- pure (importTree (config r) || not (thirdPartyPopulated (remotetype r)))
<&&> isImportSupported r
let r' = r
{ remotetype = (remotetype r)
{ exportSupported = if isexporttree
{ exportSupported = if isexport
then exportSupported (remotetype r)
else exportUnsupported
, importSupported = if isimporttree
, importSupported = if isimport
then importSupported (remotetype r)
else importUnsupported
}
}
if not isexporttree && not isimporttree
if not isexport && not isimport
then return r'
else adjustExportImport' isexporttree isimporttree r' rs
else adjustExportImport' isexport isimport r' rs
adjustExportImport' :: Bool -> Bool -> Remote -> RemoteStateHandle -> Annex Remote
adjustExportImport' isexporttree isimporttree r rs = do
adjustExportImport' isexport isimport r rs = do
dbv <- prepdbv
ciddbv <- prepciddb
let normal = not isexporttree && not isimporttree
let normal = not isexport && not isimport
let iskeyvaluestore = normal || appendonly r
return $ r
{ exportActions = if isexporttree
then if isimporttree
{ exportActions = if isexport
then if isimport
then exportActionsForImport dbv ciddbv (exportActions r)
else exportActions r
else exportUnsupported
, importActions = if isimporttree
, importActions = if isimport
then importActions r
else importUnsupported
, storeKey = \k af p ->
@ -139,11 +144,13 @@ adjustExportImport' isexporttree isimporttree r rs = do
-- when another repository has already stored the
-- key, and the local repository does not know
-- about it. To avoid unnecessary costs, don't do it.
if isexporttree
then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
else if isimporttree
then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it"
else storeKey r k af p
if mergeable
then if isexport
then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
else if isimport
then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it"
else storeKey r k af p
else storeKey r k af p
, removeKey = \k ->
-- Removing a key from an export would need to
-- change the tree in the export log to not include
@ -151,19 +158,21 @@ adjustExportImport' isexporttree isimporttree r rs = do
-- files would not be dealt with correctly.
-- There does not seem to be a good use case for
-- removing a key from an export in any case.
if isexporttree
then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
else if isimporttree
then giveup "dropping content from this remote is not supported because it is configured with importtree=yes"
else removeKey r k
, lockContent = if iskeyvaluestore
if mergeable
then if isexport
then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
else if isimport
then giveup "dropping content from this remote is not supported because it is configured with importtree=yes"
else removeKey r k
else removeKey r k
, lockContent = if iskeyvaluestore || not mergeable
then lockContent r
else Nothing
, retrieveKeyFile = \k af dest p ->
if isimporttree
if isimport
then supportappendonlyretrieve k af dest p $
retrieveKeyFileFromImport dbv ciddbv k af dest p
else if isexporttree
else if isexport
then supportappendonlyretrieve k af dest p $
retrieveKeyFileFromExport dbv k af dest p
else retrieveKeyFile r k af dest p
@ -172,16 +181,19 @@ adjustExportImport' isexporttree isimporttree r rs = do
else Nothing
, checkPresent = \k -> if appendonly r
then checkPresent r k
else if isimporttree
else if isimport
then anyM (checkPresentImport ciddbv k)
=<< getexportlocs dbv k
else if isexporttree
else if isexport
-- Check if any of the files a key
-- was exported to are present. This
-- doesn't guarantee the export
-- contains the right content,
-- which is why export remotes
-- are untrusted.
-- if the remote is an export,
-- or if something else can write
-- to it. Remotes that have such
-- problems are made untrusted,
-- so it's not worried about here.
then anyM (checkPresentExport (exportActions r) k)
=<< getexportlocs dbv k
else checkPresent r k
@ -201,17 +213,23 @@ adjustExportImport' isexporttree isimporttree r rs = do
else return Nothing
, getInfo = do
is <- getInfo r
is' <- if isexporttree
is' <- if isexport && not mergeable
then do
ts <- map fromRef . exportedTreeishes
<$> getExport (uuid r)
return (is++[("exporttree", "yes"), ("exportedtree", unwords ts)])
else return is
return $ if isimporttree
return $ if isimport && not mergeable
then (is'++[("importtree", "yes")])
else is'
}
where
-- When a remote is populated by a third party, a tree can be
-- imported from it, but that tree is not mergeable into the
-- user's own git branch. But annex objects found in the tree
-- (identified by importKey) can still be retrieved from the remote.
mergeable = thirdPartyPopulated (remotetype r)
-- exportActions adjusted to use the equivilant import actions,
-- which take ContentIdentifiers into account.
exportActionsForImport dbv ciddbv ea = ea

View file

@ -40,6 +40,7 @@ remote = specialRemoteType $ RemoteType
, setup = hookSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
hooktypeField :: RemoteConfigField

View file

@ -41,6 +41,7 @@ remote = RemoteType
, setup = httpAlsoSetup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
urlField :: RemoteConfigField

View file

@ -41,6 +41,7 @@ remote = RemoteType
, setup = error "P2P remotes are set up using git-annex p2p"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
chainGen :: P2PAddress -> Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)

View file

@ -60,6 +60,7 @@ remote = specialRemoteType $ RemoteType
, setup = rsyncSetup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
shellEscapeField :: RemoteConfigField

View file

@ -118,6 +118,7 @@ remote = specialRemoteType $ RemoteType
, setup = s3Setup
, exportSupported = exportIsSupported
, importSupported = importIsSupported
, thirdPartyPopulated = False
}
bucketField :: RemoteConfigField

View file

@ -67,6 +67,7 @@ remote = specialRemoteType $ RemoteType
, setup = tahoeSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
scsField :: RemoteConfigField

View file

@ -32,6 +32,7 @@ remote = RemoteType
, setup = error "not supported"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
-- There is only one web remote, and it always exists.

View file

@ -57,6 +57,7 @@ remote = specialRemoteType $ RemoteType
, setup = webdavSetup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
urlField :: RemoteConfigField

View file

@ -63,10 +63,15 @@ data RemoteTypeA a = RemoteType
, configParser :: RemoteConfig -> a RemoteConfigParser
-- initializes or enables a remote
, setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID)
-- check if a remote of this type is able to support export of trees
-- check if a remote of this type is able to support export
, exportSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool
-- check if a remote of this type is able to support import of trees
-- check if a remote of this type is able to support import
, importSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool
-- is a remote of this type not a usual key/value store,
-- or export/import of a tree of files, but instead a collection
-- of files, populated by something outside git-annex, some of
-- which may be annex objects?
, thirdPartyPopulated :: Bool
}
instance Eq (RemoteTypeA a) where
@ -113,9 +118,9 @@ data RemoteA a = Remote
-- Some remotes can checkPresent without an expensive network
-- operation.
, checkPresentCheap :: Bool
-- Some remotes support export of trees of files.
-- Some remotes support export.
, exportActions :: ExportActions a
-- Some remotes support import of trees of files.
-- Some remotes support import.
, importActions :: ImportActions a
-- Some remotes can provide additional details for whereis.
, whereisKey :: Maybe (Key -> a [String])
@ -288,8 +293,13 @@ data ImportActions a = ImportActions
-- bearing in mind that the file on the remote may have changed
-- since the ContentIdentifier was generated.
--
-- Throws exception on failure.
, importKey :: Maybe (ImportLocation -> ContentIdentifier -> MeterUpdate -> a Key)
-- When the remote is thirdPartyPopulated, this should check if the
-- file stored on the remote is the content of an annex object,
-- and return its Key, or Nothing if it is not. Should not
-- otherwise return Nothing.
--
-- Throws exception on failure to access the remote.
, importKey :: Maybe (ImportLocation -> ContentIdentifier -> MeterUpdate -> a (Maybe Key))
-- Retrieves a file from the remote. Ensures that the file
-- it retrieves has the requested ContentIdentifier.
--