Merge branch 'borg'

This commit is contained in:
Joey Hess 2020-12-22 16:19:32 -04:00
commit 310d3c3823
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
43 changed files with 782 additions and 105 deletions

View file

@ -11,7 +11,7 @@ import Annex.Common
import qualified Annex
import Logs.Trust
import Annex.NumCopies
import Types.Remote (uuid, appendonly, config)
import Types.Remote (uuid, appendonly, config, remotetype, thirdPartyPopulated)
import qualified Remote
import qualified Command.Drop
import Command
@ -88,6 +88,7 @@ handleDropsFrom locs rs reason fromhere key afile si preverified runner = do
| appendonly r = go fs rest n
| exportTree (config r) = go fs rest n
| importTree (config r) = go fs rest n
| thirdPartyPopulated (remotetype r) = go fs rest n
| checkcopies n (Just $ Remote.uuid r) =
dropr fs r n >>= go fs rest
| otherwise = pure n

View file

@ -12,6 +12,7 @@ module Annex.Import (
ImportCommitConfig(..),
buildImportCommit,
buildImportTrees,
recordImportTree,
canImportKeys,
importKeys,
makeImportMatcher,
@ -104,6 +105,28 @@ buildImportCommit remote importtreeconfig importcommitconfig importable =
Just trackingcommit -> inRepo (Git.Ref.tree trackingcommit) >>= \case
Nothing -> go Nothing
Just _ -> go (Just trackingcommit)
where
go trackingcommit = do
(imported, updatestate) <- recordImportTree remote importtreeconfig importable
buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case
Just finalcommit -> do
updatestate
return (Just finalcommit)
Nothing -> return Nothing
{- Builds a tree for an import from a special remote.
-
- Also returns an action that can be used to update
- all the other state to record the import.
-}
recordImportTree
:: Remote
-> ImportTreeConfig
-> ImportableContents (Either Sha Key)
-> Annex (History Sha, Annex ())
recordImportTree remote importtreeconfig importable = do
imported@(History finaltree _) <- buildImportTrees basetree subdir importable
return (imported, updatestate finaltree)
where
basetree = case importtreeconfig of
ImportTree -> emptyTree
@ -112,21 +135,12 @@ buildImportCommit remote importtreeconfig importcommitconfig importable =
ImportTree -> Nothing
ImportSubTree dir _ -> Just dir
go trackingcommit = do
imported@(History finaltree _) <-
buildImportTrees basetree subdir importable
buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case
Just finalcommit -> do
updatestate finaltree
return (Just finalcommit)
Nothing -> return Nothing
updatestate committedtree = do
updatestate finaltree = do
importedtree <- case subdir of
Nothing -> pure committedtree
Nothing -> pure finaltree
Just dir ->
let subtreeref = Ref $
fromRef' committedtree
fromRef' finaltree
<> ":"
<> getTopFilePath dir
in fromMaybe emptyTree
@ -308,9 +322,10 @@ importKeys
:: Remote
-> ImportTreeConfig
-> Bool
-> Bool
-> ImportableContents (ContentIdentifier, ByteSize)
-> Annex (Maybe (ImportableContents (Either Sha Key)))
importKeys remote importtreeconfig importcontent importablecontents = do
importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents = do
unless (canImportKeys remote importcontent) $
giveup "This remote does not support importing without downloading content."
-- This map is used to remember content identifiers that
@ -332,7 +347,9 @@ importKeys remote importtreeconfig importcontent importablecontents = do
go oldversion cidmap importing (ImportableContents l h) db = do
largematcher <- largeFilesMatcher
jobs <- forM l $ \i ->
startimport cidmap importing db i oldversion largematcher
if thirdpartypopulated
then thirdpartypopulatedimport cidmap db i
else startimport cidmap importing db i oldversion largematcher
l' <- liftIO $ forM jobs $
either pure (atomically . takeTMVar)
if any isNothing l'
@ -391,6 +408,20 @@ importKeys remote importtreeconfig importcontent importablecontents = do
importaction
return (Right job)
thirdpartypopulatedimport cidmap db (loc, (cid, sz)) =
case Remote.importKey ia of
Nothing -> return $ Left Nothing
Just importkey ->
tryNonAsync (importkey loc cid sz nullMeterUpdate) >>= \case
Right (Just k) -> do
recordcidkey cidmap db cid k
logChange k (Remote.uuid remote) InfoPresent
return $ Left $ Just (loc, Right k)
Right Nothing -> return $ Left Nothing
Left e -> do
warning (show e)
return $ Left Nothing
importordownload cidmap db (loc, (cid, sz)) largematcher= do
f <- locworktreefile loc
matcher <- largematcher f
@ -433,25 +464,22 @@ importKeys remote importtreeconfig importcontent importablecontents = do
return Nothing
where
importer = do
unsizedk <- importkey loc cid
-- Don't display progress when generating
-- key, if the content will later be
-- downloaded, which is a more expensive
-- operation generally.
(if importcontent then nullMeterUpdate else p)
-- This avoids every remote needing
-- to add the size.
let k = alterKey unsizedk $ \kd -> kd
{ keySize = keySize kd <|> Just sz }
checkSecureHashes k >>= \case
Nothing -> do
recordcidkey cidmap db cid k
logChange k (Remote.uuid remote) InfoPresent
if importcontent
then getcontent k
else return (Just (k, True))
Just msg -> giveup (msg ++ " to import")
-- Don't display progress when generating
-- key, if the content will later be
-- downloaded, which is a more expensive
-- operation generally.
let p' = if importcontent then nullMeterUpdate else p
importkey loc cid sz p' >>= \case
Nothing -> return Nothing
Just k -> checkSecureHashes k >>= \case
Nothing -> do
recordcidkey cidmap db cid k
logChange k (Remote.uuid remote) InfoPresent
if importcontent
then getcontent k
else return (Just (k, True))
Just msg -> giveup (msg ++ " to import")
getcontent :: Key -> Annex (Maybe (Key, Bool))
getcontent k = do
let af = AssociatedFile (Just f)
@ -630,14 +658,17 @@ makeImportMatcher r = load preferredContentKeylessTokens >>= \case
- regardless. (Similar to how git add behaves on gitignored files.)
- This avoids creating a remote tracking branch that, when merged,
- would delete the files.
-
- Throws exception if unable to contact the remote.
- Returns Nothing when there is no change since last time.
-}
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
getImportableContents r importtreeconfig ci matcher =
getImportableContents r importtreeconfig ci matcher = do
Remote.listImportableContents (Remote.importActions r) >>= \case
Nothing -> return Nothing
Just importable -> do
dbhandle <- Export.openDb (Remote.uuid r)
Just <$> filterunwanted dbhandle importable
Nothing -> return Nothing
where
filterunwanted dbhandle ic = ImportableContents
<$> filterM (wanted dbhandle) (importableContents ic)

View file

@ -57,7 +57,8 @@ calcSyncRemotes = do
contentremotes <- filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) $
filter (\r -> Remote.uuid r /= NoUUID) syncable
let (exportremotes, nonexportremotes) = partition (exportTree . Remote.config) contentremotes
let dataremotes = filter (not . importTree . Remote.config) nonexportremotes
let isimport r = importTree (Remote.config r) || Remote.thirdPartyPopulated (Remote.remotetype r)
let dataremotes = filter (not . isimport) nonexportremotes
return $ \dstatus -> dstatus
{ syncRemotes = syncable

View file

@ -306,7 +306,7 @@ seekRemote remote branch msubdir importcontent ci = do
void $ includeCommandAction (listContents remote importtreeconfig ci importabletvar)
liftIO (atomically (readTVar importabletvar)) >>= \case
Nothing -> return ()
Just importable -> importKeys remote importtreeconfig importcontent importable >>= \case
Just importable -> importKeys remote importtreeconfig importcontent False importable >>= \case
Nothing -> warning $ concat
[ "Failed to import some files from "
, Remote.name remote
@ -324,21 +324,25 @@ seekRemote remote branch msubdir importcontent ci = do
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart
listContents remote importtreeconfig ci tvar = starting "list" ai si $
listContents' remote importtreeconfig ci $ \importable -> do
liftIO $ atomically $ writeTVar tvar importable
next $ return True
where
ai = ActionItemOther (Just (Remote.name remote))
si = SeekInput []
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
listContents' remote importtreeconfig ci a =
makeImportMatcher remote >>= \case
Right matcher -> getImportableContents remote importtreeconfig ci matcher >>= \case
Just importable -> next $ do
liftIO $ atomically $ writeTVar tvar (Just importable)
return True
Nothing -> giveup $ "Unable to list contents of " ++ Remote.name remote
Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case
Right importable -> a importable
Left e -> giveup $ "Unable to list contents of " ++ Remote.name remote ++ ": " ++ show e
Left err -> giveup $ unwords
[ "Cannot import from"
, Remote.name remote
, "because of a problem with its configuration:"
, err
]
where
ai = ActionItemOther (Just (Remote.name remote))
si = SeekInput []
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents (Either Sha Key) -> CommandStart
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable =

View file

@ -67,7 +67,7 @@ import Annex.UpdateInstead
import Annex.Export
import Annex.TaggedPush
import Annex.CurrentBranch
import Annex.Import (canImportKeys)
import Annex.Import
import Annex.CheckIgnore
import Types.FileMatcher
import qualified Database.Export as Export
@ -211,8 +211,9 @@ seek' o = do
dataremotes <- filter (\r -> Remote.uuid r /= NoUUID)
<$> filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) remotes
let (exportremotes, nonexportremotes) = partition (exportTree . Remote.config) dataremotes
let importremotes = filter (importTree . Remote.config) dataremotes
let keyvalueremotes = filter (not . importTree . Remote.config) nonexportremotes
let isimport r = importTree (Remote.config r) || Remote.thirdPartyPopulated (Remote.remotetype r)
let importremotes = filter isimport dataremotes
let keyvalueremotes = filter (not . isimport) nonexportremotes
if cleanupOption o
then do
@ -464,6 +465,9 @@ pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && want
importRemote :: Bool -> SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandSeek
importRemote importcontent o mergeconfig remote currbranch
| not (pullOption o) || not wantpull = noop
| Remote.thirdPartyPopulated (Remote.remotetype remote) =
when (canImportKeys remote importcontent) $
importThirdPartyPopulated remote
| otherwise = case remoteAnnexTrackingBranch (Remote.gitconfig remote) of
Nothing -> noop
Just tb -> do
@ -480,6 +484,29 @@ importRemote importcontent o mergeconfig remote currbranch
where
wantpull = remoteAnnexPull (Remote.gitconfig remote)
{- Import from a remote that is populated by a third party, by listing
- the contents of the remote, and then adding only the files on it that
- importKey identifies to a tree. The tree is only used to keep track
- of where keys are located on the remote, no remote tracking branch is
- updated, because the filenames are the names of annex object files,
- not suitable for a tracking branch. Does not transfer any content. -}
importThirdPartyPopulated :: Remote -> CommandSeek
importThirdPartyPopulated remote =
void $ includeCommandAction $ starting "list" ai si $
Command.Import.listContents' remote ImportTree (CheckGitIgnore False) go
where
go (Just importable) = importKeys remote ImportTree False True importable >>= \case
Just importablekeys -> do
(_imported, updatestate) <- recordImportTree remote ImportTree importablekeys
next $ do
updatestate
return True
Nothing -> next $ return False
go Nothing = next $ return True -- unchanged from before
ai = ActionItemOther (Just (Remote.name remote))
si = SeekInput []
{- The remote probably has both a master and a synced/master branch.
- Which to merge from? Well, the master has whatever latest changes
- were committed (or pushed changes, if this is a bare remote),

View file

@ -1,17 +1,17 @@
{- git ls-tree interface
-
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Git.LsTree (
TreeItem(..),
LsTreeMode(..),
lsTree,
lsTree',
lsTreeStrict,
lsTreeStrict',
lsTreeParams,
lsTreeFiles,
parseLsTree,
@ -30,6 +30,7 @@ import Data.Either
import System.Posix.Types
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Attoparsec.ByteString as AS
import qualified Data.Attoparsec.ByteString.Lazy as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
@ -38,7 +39,7 @@ data TreeItem = TreeItem
, typeobj :: S.ByteString
, sha :: Ref
, file :: TopFilePath
} deriving Show
} deriving (Show)
data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive
@ -51,6 +52,13 @@ lsTree' ps lsmode t repo = do
(l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo
return (rights (map parseLsTree l), cleanup)
lsTreeStrict :: LsTreeMode -> Ref -> Repo -> IO [TreeItem]
lsTreeStrict = lsTreeStrict' []
lsTreeStrict' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO [TreeItem]
lsTreeStrict' ps lsmode t repo = rights . map parseLsTreeStrict
<$> pipeNullSplitStrict (lsTreeParams lsmode t ps) repo
lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam]
lsTreeParams lsmode r ps =
[ Param "ls-tree"
@ -83,6 +91,13 @@ parseLsTree b = case A.parse parserLsTree b of
A.Done _ r -> Right r
A.Fail _ _ err -> Left err
parseLsTreeStrict :: S.ByteString -> Either String TreeItem
parseLsTreeStrict b = go (AS.parse parserLsTree b)
where
go (AS.Done _ r) = Right r
go (AS.Fail _ _ err) = Left err
go (AS.Partial c) = go (c mempty)
{- Parses a line of ls-tree output, in format:
- mode SP type SP sha TAB file
-

View file

@ -135,7 +135,12 @@ fmtObjectType CommitObject = "commit"
fmtObjectType TreeObject = "tree"
{- Types of items in a tree. -}
data TreeItemType = TreeFile | TreeExecutable | TreeSymlink | TreeSubmodule
data TreeItemType
= TreeFile
| TreeExecutable
| TreeSymlink
| TreeSubmodule
| TreeSubtree
deriving (Eq, Show)
{- Git uses magic numbers to denote the type of a tree item. -}
@ -144,6 +149,7 @@ readTreeItemType "100644" = Just TreeFile
readTreeItemType "100755" = Just TreeExecutable
readTreeItemType "120000" = Just TreeSymlink
readTreeItemType "160000" = Just TreeSubmodule
readTreeItemType "040000" = Just TreeSubtree
readTreeItemType _ = Nothing
fmtTreeItemType :: TreeItemType -> S.ByteString
@ -151,12 +157,14 @@ fmtTreeItemType TreeFile = "100644"
fmtTreeItemType TreeExecutable = "100755"
fmtTreeItemType TreeSymlink = "120000"
fmtTreeItemType TreeSubmodule = "160000"
fmtTreeItemType TreeSubtree = "040000"
toTreeItemType :: FileMode -> Maybe TreeItemType
toTreeItemType 0o100644 = Just TreeFile
toTreeItemType 0o100755 = Just TreeExecutable
toTreeItemType 0o120000 = Just TreeSymlink
toTreeItemType 0o160000 = Just TreeSubmodule
toTreeItemType 0o040000 = Just TreeSubtree
toTreeItemType _ = Nothing
fromTreeItemType :: TreeItemType -> FileMode
@ -164,6 +172,7 @@ fromTreeItemType TreeFile = 0o100644
fromTreeItemType TreeExecutable = 0o100755
fromTreeItemType TreeSymlink = 0o120000
fromTreeItemType TreeSubmodule = 0o160000
fromTreeItemType TreeSubtree = 0o040000
data Commit = Commit
{ commitTree :: Sha

View file

@ -32,12 +32,16 @@ recordContentIdentifier :: RemoteStateHandle -> ContentIdentifier -> Key -> Anne
recordContentIdentifier (RemoteStateHandle u) cid k = do
c <- liftIO currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.change (remoteContentIdentifierLogFile config k) $
buildLog . addcid c . parseLog
Annex.Branch.maybeChange (remoteContentIdentifierLogFile config k) $
addcid c . parseLog
where
addcid c l = changeMapLog c u (cid :| contentIdentifierList (M.lookup u m)) l
addcid c v
| cid `elem` l = Nothing -- no change needed
| otherwise = Just $ buildLog $
changeMapLog c u (cid :| l) v
where
m = simpleMap l
m = simpleMap v
l = contentIdentifierList (M.lookup u m)
-- | Get all known content identifiers for a key.
getContentIdentifiers :: Key -> Annex [(RemoteStateHandle, [ContentIdentifier])]

View file

@ -1,4 +1,4 @@
{- git-annex export log
{- git-annex export log (also used to log imports)
-
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
-
@ -64,7 +64,6 @@ exportedTreeishes = nub . map exportedTreeish
incompleteExportedTreeishes :: [Exported] -> [Git.Ref]
incompleteExportedTreeishes = concatMap incompleteExportedTreeish
data ExportParticipants = ExportParticipants
{ exportFrom :: UUID
, exportTo :: UUID

View file

@ -46,6 +46,7 @@ remote = specialRemoteType $ RemoteType
, setup = adbSetup
, exportSupported = exportIsSupported
, importSupported = importIsSupported
, thirdPartyPopulated = False
}
androiddirectoryField :: RemoteConfigField
@ -286,8 +287,11 @@ renameExportM serial adir _k old new = do
]
listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM serial adir =
process <$> adbShell serial
listImportableContentsM serial adir = adbfind >>= \case
Just ls -> return $ Just $ ImportableContents (mapMaybe mk ls) []
Nothing -> giveup "adb find failed"
where
adbfind = adbShell serial
[ Param "find"
-- trailing slash is needed, or android's find command
-- won't recurse into the directory
@ -297,9 +301,6 @@ listImportableContentsM serial adir =
, Param "-c", Param statformat
, Param "{}", Param "+"
]
where
process Nothing = Nothing
process (Just ls) = Just $ ImportableContents (mapMaybe mk ls) []
statformat = adbStatFormat ++ "\t%n"

View file

@ -49,6 +49,7 @@ remote = RemoteType
, setup = error "not supported"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
-- There is only one bittorrent remote, and it always exists.

318
Remote/Borg.hs Normal file
View file

@ -0,0 +1,318 @@
{- Using borg as a remote.
-
- Copyright 2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Remote.Borg (remote) where
import Annex.Common
import Types.Remote
import Types.Creds
import Types.Import
import qualified Git
import qualified Git.LsTree as LsTree
import Git.Types (toTreeItemType, TreeItemType(..))
import Git.FilePath
import Config
import Config.Cost
import Annex.Tmp
import Annex.SpecialRemote.Config
import Remote.Helper.Special
import Remote.Helper.ExportImport
import Annex.UUID
import Types.ProposedAccepted
import Utility.Metered
import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated
import Logs.Export
import Data.Either
import Text.Read
import Control.Exception (evaluate)
import Control.DeepSeq
import qualified Data.Map as M
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
type BorgRepo = String
type BorgArchiveName = S.ByteString
remote :: RemoteType
remote = RemoteType
{ typename = "borg"
, enumerate = const (findSpecialRemotes "borgrepo")
, generate = gen
, configParser = mkRemoteConfigParser
[ optionalStringParser borgrepoField
(FieldDesc "(required) borg repository to use")
]
, setup = borgSetup
, exportSupported = exportUnsupported
, importSupported = importIsSupported
, thirdPartyPopulated = True
}
borgrepoField :: RemoteConfigField
borgrepoField = Accepted "borgrepo"
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs = do
c <- parsedRemoteConfig remote rc
cst <- remoteCost gc $
if borgLocal borgrepo
then nearlyCheapRemoteCost
else expensiveRemoteCost
return $ Just $ Remote
{ uuid = u
, cost = cst
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retrieveKeyFileDummy
, retrieveKeyFileCheap = Nothing
-- Borg cryptographically verifies content.
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = removeKeyDummy
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = borgLocal borgrepo
, exportActions = exportUnsupported
, importActions = ImportActions
{ listImportableContents = listImportableContentsM u borgrepo
, importKey = Just ThirdPartyPopulated.importKey
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM borgrepo
, checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierM borgrepo
-- This remote is thirdPartyPopulated, so these
-- actions will never be used.
, storeExportWithContentIdentifier = storeExportWithContentIdentifier importUnsupported
, removeExportDirectoryWhenEmpty = removeExportDirectoryWhenEmpty importUnsupported
, removeExportWithContentIdentifier = removeExportWithContentIdentifier importUnsupported
}
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
, config = c
, getRepo = return r
, gitconfig = gc
, localpath = if borgLocal borgrepo && not (null borgrepo)
then Just borgrepo
else Nothing
, remotetype = remote
, availability = if borgLocal borgrepo then LocallyAvailable else GloballyAvailable
, readonly = False
, appendonly = False
, mkUnavailable = return Nothing
, getInfo = return [("repo", borgrepo)]
, claimUrl = Nothing
, checkUrl = Nothing
, remoteStateHandle = rs
}
where
borgrepo = fromMaybe (giveup "missing borgrepo") $ remoteAnnexBorgRepo gc
borgSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
borgSetup _ mu _ c _gc = do
u <- maybe (liftIO genUUID) return mu
-- verify configuration is sane
let borgrepo = maybe (giveup "Specify borgrepo=") fromProposedAccepted $
M.lookup borgrepoField c
-- The borgrepo is stored in git config, as well as this repo's
-- persistant state, so it can vary between hosts.
gitConfigSpecialRemote u c [("borgrepo", borgrepo)]
return (c, u)
borgLocal :: BorgRepo -> Bool
borgLocal = notElem ':'
borgArchive :: BorgRepo -> BorgArchiveName -> String
borgArchive r n = r ++ "::" ++ decodeBS' n
listImportableContentsM :: UUID -> BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM u borgrepo = prompt $ do
imported <- getImported u
ls <- withborglist borgrepo "{barchive}{NUL}" $ \as ->
forM as $ \archivename ->
case M.lookup archivename imported of
Just getfast -> return $ Left (archivename, getfast)
Nothing -> Right <$>
let archive = borgArchive borgrepo archivename
in withborglist archive "{size}{NUL}{path}{NUL}" $
liftIO . evaluate . force . parsefilelist archivename
if all isLeft ls && M.null (M.difference imported (M.fromList (lefts ls)))
then return Nothing -- unchanged since last time, avoid work
else Just . mkimportablecontents <$> mapM (either snd pure) ls
where
withborglist what format a = do
let p = (proc "borg" ["list", what, "--format", format])
{ std_out = CreatePipe }
(Nothing, Just h, Nothing, pid) <- liftIO $ createProcess p
l <- liftIO $ map L.toStrict
. filter (not . L.null)
. L.split 0
<$> L.hGetContents h
let cleanup = liftIO $ do
hClose h
forceSuccessProcess p pid
a l `finally` cleanup
parsefilelist archivename (bsz:f:rest) = case readMaybe (fromRawFilePath bsz) of
Nothing -> parsefilelist archivename rest
Just sz ->
let loc = genImportLocation archivename f
-- This does a little unncessary work to parse the
-- key, which is then thrown away. But, it lets the
-- file list be shrank down to only the ones that are
-- importable keys, so avoids needing to buffer all
-- the rest of the files in memory.
in case ThirdPartyPopulated.importKey' loc sz of
Just _k -> (loc, (borgContentIdentifier, sz))
: parsefilelist archivename rest
Nothing -> parsefilelist archivename rest
parsefilelist _ _ = []
-- importableHistory is not used for retrieval, so is not
-- populated with old archives. Instead, a tree of archives
-- is constructed, by genImportLocation including the archive
-- name in the ImportLocation.
mkimportablecontents l = ImportableContents
{ importableContents = concat l
, importableHistory = []
}
-- We do not need a ContentIdentifier in order to retrieve a file from
-- borg; the ImportLocation contains all that's needed. So, this is left
-- empty.
borgContentIdentifier :: ContentIdentifier
borgContentIdentifier = ContentIdentifier mempty
-- Borg does not allow / in the name of an archive, so the archive
-- name will always be the first directory in the ImportLocation.
--
-- Paths in a borg archive are always relative, not absolute, so the use of
-- </> to combine the archive name with the path will always work.
genImportLocation :: BorgArchiveName -> RawFilePath -> ImportLocation
genImportLocation archivename p =
ThirdPartyPopulated.mkThirdPartyImportLocation $
archivename P.</> p
extractImportLocation :: ImportLocation -> (BorgArchiveName, RawFilePath)
extractImportLocation loc = go $ P.splitDirectories $
ThirdPartyPopulated.fromThirdPartyImportLocation loc
where
go (archivename:rest) = (archivename, P.joinPath rest)
go _ = giveup $ "Unable to parse import location " ++ fromRawFilePath (fromImportLocation loc)
-- Since the ImportLocation starts with the archive name, a list of all
-- archive names we've already imported can be found by just listing the
-- last imported tree. And the contents of those archives can be retrieved
-- by listing the subtree recursively, which will likely be quite a lot
-- faster than running borg.
getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (ContentIdentifier, ByteSize))]))
getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
where
go t = M.fromList . mapMaybe mk
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive t)
mk ti
| toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just
( getTopFilePath (LsTree.file ti)
, getcontents
(getTopFilePath (LsTree.file ti))
(LsTree.sha ti)
)
| otherwise = Nothing
getcontents archivename t = mapMaybe (mkcontents archivename)
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive t)
mkcontents archivename ti = do
let f = ThirdPartyPopulated.fromThirdPartyImportLocation $
mkImportLocation $ getTopFilePath $ LsTree.file ti
k <- deserializeKey' (P.takeFileName f)
return
( genImportLocation archivename f
,
( borgContentIdentifier
-- defaulting to 0 size is ok, this size
-- only gets used by
-- ThirdPartyPopulated.importKey,
-- which ignores the size when the key
-- does not have a size.
, fromMaybe 0 (fromKey keySize k)
)
)
-- Check if the file is still there in the borg archive.
-- Does not check that the content is unchanged; we assume that
-- the content of files in borg archives does not change, which is normally
-- the case. But archives may be deleted, and files may be deleted.
checkPresentExportWithContentIdentifierM :: BorgRepo -> Key -> ImportLocation -> [ContentIdentifier] -> Annex Bool
checkPresentExportWithContentIdentifierM borgrepo _ loc _ = prompt $ liftIO $ do
let p = proc "borg"
[ "list"
, "--format"
, "1"
, borgArchive borgrepo archivename
, fromRawFilePath archivefile
]
-- borg list exits nonzero with an error message if an archive
-- no longer exists. But, the user can delete archives at any
-- time they want. So, hide errors, and if it exists nonzero,
-- check if the borg repository still exists, and only throw an
-- exception if not.
(Nothing, Just h, Nothing, pid) <- withNullHandle $ \nullh ->
createProcess $ p
{ std_out = CreatePipe
, std_err = UseHandle nullh
}
ok <- (== "1") <$> hGetContentsStrict h
hClose h
ifM (checkSuccessProcess pid)
( return ok
, checkrepoexists
)
where
(archivename, archivefile) = extractImportLocation loc
checkrepoexists = do
let p = proc "borg"
[ "list"
, "--format"
, "1"
, borgrepo
]
(Nothing, Nothing, Nothing, pid) <- withNullHandle $ \nullh ->
createProcess $ p
{ std_out = UseHandle nullh }
ifM (checkSuccessProcess pid)
( return False -- repo exists, content not in it
, giveup $ "Unable to access borg repository " ++ borgrepo
)
retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierM borgrepo loc _ dest mkk _ = do
showOutput
prompt $ withOtherTmp $ \othertmp -> liftIO $ do
-- borgrepo could be relative, and borg has to be run
-- in the temp directory to get it to write there
absborgrepo <- fromRawFilePath <$> absPath (toRawFilePath borgrepo)
let p = proc "borg"
[ "extract"
, borgArchive absborgrepo archivename
, fromRawFilePath archivefile
]
(Nothing, Nothing, Nothing, pid) <- createProcess $ p
{ cwd = Just (fromRawFilePath othertmp) }
forceSuccessProcess p pid
-- Filepaths in borg archives are relative, so it's ok to
-- combine with </>
moveFile (fromRawFilePath othertmp </> fromRawFilePath archivefile) dest
removeDirectoryRecursive (fromRawFilePath othertmp)
mkk
where
(archivename, archivefile) = extractImportLocation loc

View file

@ -50,6 +50,7 @@ remote = specialRemoteType $ RemoteType
, setup = bupSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
buprepoField :: RemoteConfigField

View file

@ -45,6 +45,7 @@ remote = specialRemoteType $ RemoteType
, setup = ddarSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
ddarrepoField :: RemoteConfigField

View file

@ -56,6 +56,7 @@ remote = specialRemoteType $ RemoteType
, setup = directorySetup
, exportSupported = exportIsSupported
, importSupported = importIsSupported
, thirdPartyPopulated = False
}
directoryField :: RemoteConfigField
@ -337,10 +338,10 @@ removeExportLocation topdir loc =
in go (upFrom loc') =<< tryIO (removeDirectory p)
listImportableContentsM :: RawFilePath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM dir = catchMaybeIO $ liftIO $ do
listImportableContentsM dir = liftIO $ do
l <- dirContentsRecursive (fromRawFilePath dir)
l' <- mapM (go . toRawFilePath) l
return $ ImportableContents (catMaybes l') []
return $ Just $ ImportableContents (catMaybes l') []
where
go f = do
st <- R.getFileStatus f
@ -369,13 +370,15 @@ guardSameContentIdentifiers cont old new
| new == Just old = cont
| otherwise = giveup "file content has changed"
importKeyM :: RawFilePath -> ExportLocation -> ContentIdentifier -> MeterUpdate -> Annex Key
importKeyM dir loc cid p = do
importKeyM :: RawFilePath -> ExportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key)
importKeyM dir loc cid sz p = do
backend <- chooseBackend f
k <- fst <$> genKey ks p backend
unsizedk <- fst <$> genKey ks p backend
let k = alterKey unsizedk $ \kd -> kd
{ keySize = keySize kd <|> Just sz }
currcid <- liftIO $ mkContentIdentifier absf
=<< R.getFileStatus absf
guardSameContentIdentifiers (return k) cid currcid
guardSameContentIdentifiers (return (Just k)) cid currcid
where
f = fromExportLocation loc
absf = dir P.</> f

View file

@ -53,6 +53,7 @@ remote = specialRemoteType $ RemoteType
, setup = externalSetup
, exportSupported = checkExportSupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
externaltypeField :: RemoteConfigField

View file

@ -78,6 +78,7 @@ remote = specialRemoteType $ RemoteType
, setup = gCryptSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
gitRepoField :: RemoteConfigField

View file

@ -87,6 +87,7 @@ remote = RemoteType
, setup = gitSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
locationField :: RemoteConfigField

View file

@ -74,6 +74,7 @@ remote = specialRemoteType $ RemoteType
, setup = mySetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
urlField :: RemoteConfigField

View file

@ -48,6 +48,7 @@ remote = specialRemoteType $ RemoteType
, setup = glacierSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
datacenterField :: RemoteConfigField

View file

@ -54,7 +54,7 @@ instance HasImportUnsupported (ParsedRemoteConfig -> RemoteGitConfig -> Annex Bo
instance HasImportUnsupported (ImportActions Annex) where
importUnsupported = ImportActions
{ listImportableContents = return Nothing
{ listImportableContents = nope
, importKey = Nothing
, retrieveExportWithContentIdentifier = nope
, storeExportWithContentIdentifier = nope
@ -72,7 +72,7 @@ importIsSupported :: ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool
importIsSupported = \_ _ -> return True
-- | Prevent or allow exporttree=yes and importtree=yes when
-- setting up a new remote, depending on exportSupported and importSupported.
-- setting up a new remote, depending on the remote's capabilities.
adjustExportImportRemoteType :: RemoteType -> RemoteType
adjustExportImportRemoteType rt = rt { setup = setup' }
where
@ -80,7 +80,7 @@ adjustExportImportRemoteType rt = rt { setup = setup' }
pc <- either giveup return . parseRemoteConfig c
=<< configParser rt c
let checkconfig supported configured configfield cont =
ifM (supported rt pc gc)
ifM (supported rt pc gc <&&> pure (not (thirdPartyPopulated rt)))
( case st of
Init
| configured pc && encryptionIsEnabled pc ->
@ -102,8 +102,13 @@ adjustExportImportRemoteType rt = rt { setup = setup' }
-- | Adjust a remote to support exporttree=yes and/or importree=yes.
adjustExportImport :: Remote -> RemoteStateHandle -> Annex Remote
adjustExportImport r rs = do
isexport <- pure (exportTree (config r)) <&&> isExportSupported r
isimport <- pure (importTree (config r)) <&&> isImportSupported r
isexport <- pure (exportTree (config r))
<&&> isExportSupported r
-- When thirdPartyPopulated is True, the remote
-- does not need to be configured with importTree to support
-- imports.
isimport <- pure (importTree (config r) || thirdPartyPopulated (remotetype r))
<&&> isImportSupported r
let r' = r
{ remotetype = (remotetype r)
{ exportSupported = if isexport
@ -139,11 +144,13 @@ adjustExportImport' isexport isimport r rs = do
-- when another repository has already stored the
-- key, and the local repository does not know
-- about it. To avoid unnecessary costs, don't do it.
if isexport
then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
else if isimport
then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it"
else storeKey r k af p
if mergeable
then if isexport
then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
else if isimport
then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it"
else storeKey r k af p
else storeKey r k af p
, removeKey = \k ->
-- Removing a key from an export would need to
-- change the tree in the export log to not include
@ -151,12 +158,14 @@ adjustExportImport' isexport isimport r rs = do
-- files would not be dealt with correctly.
-- There does not seem to be a good use case for
-- removing a key from an export in any case.
if isexport
then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
else if isimport
then giveup "dropping content from this remote is not supported because it is configured with importtree=yes"
else removeKey r k
, lockContent = if iskeyvaluestore
if mergeable
then if isexport
then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
else if isimport
then giveup "dropping content from this remote is not supported because it is configured with importtree=yes"
else removeKey r k
else removeKey r k
, lockContent = if iskeyvaluestore || not mergeable
then lockContent r
else Nothing
, retrieveKeyFile = \k af dest p ->
@ -180,8 +189,11 @@ adjustExportImport' isexport isimport r rs = do
-- was exported to are present. This
-- doesn't guarantee the export
-- contains the right content,
-- which is why export remotes
-- are untrusted.
-- if the remote is an export,
-- or if something else can write
-- to it. Remotes that have such
-- problems are made untrusted,
-- so it's not worried about here.
then anyM (checkPresentExport (exportActions r) k)
=<< getexportlocs dbv k
else checkPresent r k
@ -201,17 +213,23 @@ adjustExportImport' isexport isimport r rs = do
else return Nothing
, getInfo = do
is <- getInfo r
is' <- if isexport
is' <- if isexport && not mergeable
then do
ts <- map fromRef . exportedTreeishes
<$> getExport (uuid r)
return (is++[("export", "yes"), ("exportedtree", unwords ts)])
return (is++[("exporttree", "yes"), ("exportedtree", unwords ts)])
else return is
return $ if isimport
then (is'++[("import", "yes")])
return $ if isimport && not mergeable
then (is'++[("importtree", "yes")])
else is'
}
where
-- When a remote is populated by a third party, a tree can be
-- imported from it, but that tree is not mergeable into the
-- user's own git branch. But annex objects found in the tree
-- (identified by importKey) can still be retrieved from the remote.
mergeable = thirdPartyPopulated (remotetype r)
-- exportActions adjusted to use the equivilant import actions,
-- which take ContentIdentifiers into account.
exportActionsForImport dbv ciddbv ea = ea

View file

@ -0,0 +1,86 @@
{- Helpers for thirdPartyPopulated remotes
-
- Copyright 2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Remote.Helper.ThirdPartyPopulated where
import Annex.Common
import Types.Remote
import Types.Import
import Crypto (isEncKey)
import Utility.Metered
import qualified System.FilePath.ByteString as P
import qualified Data.ByteString as S
-- When a remote is thirdPartyPopulated, the files we want are probably
-- in the .git directory. But, git does not really support .git in paths
-- in a git tree. (Such a tree can be built, but it will lead to problems.)
-- And so anything in .git is prevented from being imported.
-- To work around that, this renames that directory when generating an
-- ImportLocation.
mkThirdPartyImportLocation :: RawFilePath -> ImportLocation
mkThirdPartyImportLocation =
mkImportLocation . P.joinPath . map esc . P.splitDirectories
where
esc ".git" = "dotgit"
esc x
| "dotgit" `S.isSuffixOf` x = "dot" <> x
| otherwise = x
fromThirdPartyImportLocation :: ImportLocation -> RawFilePath
fromThirdPartyImportLocation =
P.joinPath . map unesc . P.splitDirectories . fromImportLocation
where
unesc "dotgit" = ".git"
unesc x
| "dotgit" `S.isSuffixOf` x = S.drop 3 x
| otherwise = x
-- When a remote is thirdPartyPopulated, and contains a backup of a
-- git-annex repository or some special remotes, this can be used to
-- find only those ImportLocations that are annex object files.
-- All other ImportLocations are ignored.
importKey :: ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key)
importKey loc _cid sz _ = return $ importKey' loc sz
importKey' :: ImportLocation -> ByteSize -> Maybe Key
importKey' loc sz = case deserializeKey' f of
Just k
-- Annex objects always are in a subdirectory with the same
-- name as the filename. If this is not the case for the file
-- that was backed up, it is probably not a valid annex object.
-- Eg, it could be something in annex/bad/, or annex/tmp/.
-- Or it could be a file that only happens to have a name
-- like an annex object.
-- (This does unfortunately prevent recognizing files that are
-- part of special remotes that don't use that layout. The most
-- likely special remote to be in a backup, the directory
-- special remote, does use that layout at least.)
| lastMaybe (P.splitDirectories (P.dropFileName p)) /= Just f -> Nothing
-- Chunked or encrypted keys used in special remotes are not
-- supported.
| isChunkKey k || isEncKey k -> Nothing
-- Check that the size of the key is the same as the size of the
-- file stored in the backup. This is a cheap way to make sure it's
-- probabably the actual content of the file. We don't fully
-- verify the content here because that could be a very
-- expensive operation for a large repository; if the user
-- wants to detect every possible data corruption problem
-- (eg, wrong data read off disk during backup, or the object
-- was corrupt in the git-annex repo and that bad object got
-- backed up), they can fsck the remote.
| otherwise -> case fromKey keySize k of
Just sz'
| sz' == sz -> Just k
| otherwise -> Nothing
Nothing -> Just k
Nothing -> Nothing
where
p = fromImportLocation loc
f = P.takeFileName p

View file

@ -40,6 +40,7 @@ remote = specialRemoteType $ RemoteType
, setup = hookSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
hooktypeField :: RemoteConfigField

View file

@ -41,6 +41,7 @@ remote = RemoteType
, setup = httpAlsoSetup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
urlField :: RemoteConfigField

View file

@ -36,6 +36,7 @@ import qualified Remote.Glacier
import qualified Remote.Ddar
import qualified Remote.GitLFS
import qualified Remote.HttpAlso
import qualified Remote.Borg
import qualified Remote.Hook
import qualified Remote.External
@ -57,6 +58,7 @@ remoteTypes = map adjustExportImportRemoteType
, Remote.Ddar.remote
, Remote.GitLFS.remote
, Remote.HttpAlso.remote
, Remote.Borg.remote
, Remote.Hook.remote
, Remote.External.remote
]

View file

@ -41,6 +41,7 @@ remote = RemoteType
, setup = error "P2P remotes are set up using git-annex p2p"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
chainGen :: P2PAddress -> Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)

View file

@ -60,6 +60,7 @@ remote = specialRemoteType $ RemoteType
, setup = rsyncSetup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
shellEscapeField :: RemoteConfigField

View file

@ -118,6 +118,7 @@ remote = specialRemoteType $ RemoteType
, setup = s3Setup
, exportSupported = exportIsSupported
, importSupported = importIsSupported
, thirdPartyPopulated = False
}
bucketField :: RemoteConfigField
@ -552,12 +553,11 @@ renameExportS3 hv r rs info k src dest = Just <$> go
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsS3 hv r info =
withS3Handle hv $ \case
Nothing -> do
warning $ needS3Creds (uuid r)
return Nothing
Just h -> catchMaybeIO $ liftIO $ runResourceT $
extractFromResourceT =<< startlist h
Nothing -> giveup $ needS3Creds (uuid r)
Just h -> Just <$> go h
where
go h = liftIO $ runResourceT $ extractFromResourceT =<< startlist h
startlist h
| versioning info = do
rsp <- sendS3Handle h $

View file

@ -67,6 +67,7 @@ remote = specialRemoteType $ RemoteType
, setup = tahoeSetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
scsField :: RemoteConfigField

View file

@ -32,6 +32,7 @@ remote = RemoteType
, setup = error "not supported"
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
-- There is only one web remote, and it always exists.

View file

@ -57,6 +57,7 @@ remote = specialRemoteType $ RemoteType
, setup = webdavSetup
, exportSupported = exportIsSupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
urlField :: RemoteConfigField

View file

@ -327,6 +327,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexGnupgDecryptOptions :: [String]
, remoteAnnexRsyncUrl :: Maybe String
, remoteAnnexBupRepo :: Maybe String
, remoteAnnexBorgRepo :: Maybe String
, remoteAnnexTahoe :: Maybe FilePath
, remoteAnnexBupSplitOptions :: [String]
, remoteAnnexDirectory :: Maybe FilePath
@ -391,6 +392,7 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexGnupgDecryptOptions = getoptions "gnupg-decrypt-options"
, remoteAnnexRsyncUrl = notempty $ getmaybe "rsyncurl"
, remoteAnnexBupRepo = getmaybe "buprepo"
, remoteAnnexBorgRepo = getmaybe "borgrepo"
, remoteAnnexTahoe = getmaybe "tahoe"
, remoteAnnexBupSplitOptions = getoptions "bup-split-options"
, remoteAnnexDirectory = notempty $ getmaybe "directory"

View file

@ -59,6 +59,12 @@ data ImportableContents info = ImportableContents
-- ^ Used by remotes that support importing historical versions of
-- files that are stored in them. This is equivilant to a git
-- commit history.
--
-- When retrieving a historical version of a file,
-- old ImportLocations from importableHistory are not used;
-- the content is no longer expected to be present at those
-- locations. So, if a remote does not support Key/Value access,
-- it should not populate the importableHistory.
}
deriving (Show, Generic)

View file

@ -63,10 +63,15 @@ data RemoteTypeA a = RemoteType
, configParser :: RemoteConfig -> a RemoteConfigParser
-- initializes or enables a remote
, setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID)
-- check if a remote of this type is able to support export of trees
-- check if a remote of this type is able to support export
, exportSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool
-- check if a remote of this type is able to support import of trees
-- check if a remote of this type is able to support import
, importSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool
-- is a remote of this type not a usual key/value store,
-- or export/import of a tree of files, but instead a collection
-- of files, populated by something outside git-annex, some of
-- which may be annex objects?
, thirdPartyPopulated :: Bool
}
instance Eq (RemoteTypeA a) where
@ -113,9 +118,9 @@ data RemoteA a = Remote
-- Some remotes can checkPresent without an expensive network
-- operation.
, checkPresentCheap :: Bool
-- Some remotes support export of trees of files.
-- Some remotes support export.
, exportActions :: ExportActions a
-- Some remotes support import of trees of files.
-- Some remotes support import.
, importActions :: ImportActions a
-- Some remotes can provide additional details for whereis.
, whereisKey :: Maybe (Key -> a [String])
@ -276,6 +281,9 @@ data ImportActions a = ImportActions
--
-- May also find old versions of files that are still stored in the
-- remote.
--
-- Throws exception on failure to access the remote.
-- May return Nothing when the remote is unchanged since last time.
{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
-- Generates a Key (of any type) for the file stored on the
-- remote at the ImportLocation. Does not download the file
@ -288,8 +296,13 @@ data ImportActions a = ImportActions
-- bearing in mind that the file on the remote may have changed
-- since the ContentIdentifier was generated.
--
-- Throws exception on failure.
, importKey :: Maybe (ImportLocation -> ContentIdentifier -> MeterUpdate -> a Key)
-- When the remote is thirdPartyPopulated, this should check if the
-- file stored on the remote is the content of an annex object,
-- and return its Key, or Nothing if it is not. Should not
-- otherwise return Nothing.
--
-- Throws exception on failure to access the remote.
, importKey :: Maybe (ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> a (Maybe Key))
-- Retrieves a file from the remote. Ensures that the file
-- it retrieves has the requested ContentIdentifier.
--

View file

@ -167,7 +167,9 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
this can be used to list those versions. It opens a new
block of responses. This can be repeated any number of times
(indicating a branching history), and histories can also
be nested multiple levels deep.
be nested multiple levels deep.
This should only be used when the remote supports using
"TRANSFER RECEIVE Key" to retrieve historical versions of files.
* `END`
Indicates the end of a block of responses.
* `LOCATION Name`

View file

@ -0,0 +1,17 @@
Finally gotten started on the borg special remote idea. A prerequisite of
that is remotes that can be imported from, but not exported to. So I
actually started by allowing setting importtree=yes without
exporttree=yes. A lot of code had assumptions about that not being allowed,
so it took a while to chase down everything. Finished most of that yesterday.
What I've done today is added a `thirdPartyPopulated` type of remote,
which `git-annex sync` can "pull" from by using the existing import
interface to list files on it, and determine which of them are annex object
files. I have not started on the actual borg remote at all, but this should
be all the groundwork for it done.
(I also finished up annex.stalldetection earlier this week.)
---
This work was sponsored by Jake Vosloo [on Patreon](https://patreon.com/joeyh).

View file

@ -1541,6 +1541,12 @@ Remotes are configured using these settings in `.git/config`.
the location of the bup repository to use. Normally this is automatically
set up by `git annex initremote`, but you can change it if needed.
* `remote.<name>.annex-borgrepo`
Used by borg special remotes, this configures
the location of the borg repository to use. Normally this is automatically
set up by `git annex initremote`, but you can change it if needed.
* `remote.<name>.annex-ddarrepo`
Used by ddar special remotes, this configures

View file

@ -25,6 +25,7 @@ the git history is not stored in them.
* [[webdav]]
* [[git]]
* [[httpalso]]
* [[borg]]
* [[xmpp]]
The above special remotes are built into git-annex, and can be used

View file

@ -0,0 +1,32 @@
This special remote type accesses annexed files stored in a
[borg](https://www.borgbackup.org/) repository.
Unlike most special remotes, git-annex cannot be used to store annexed
files in this special remote. You store files by using borg as usual, to
back up the git-annex repository. Then `git-annex sync` will learn about
the annexed files that are stored in the borg repository.
## configuration
These parameters can be passed to `git annex initremote` to configure the
remote:
* `borgrepo` - The location of a borg repository, eg a path, or
`user@host:path` for ssh access.
* `scan` - The path, within the borg repository, to scan for
annex object files. This can be the path to a git-annex repository,
or perhaps a non-encrypted special remote, or a path that contains
several repositories.
Information about all annex objects in the path will be
added to the git-annex branch when syncing with the borg repository.
So, it's best to avoid a path that contains object files for unrelated
git-annex repositories.
## setup example
# borg init --encryption=keyfile /path/to/borgrepo
# git annex initremote borg type=borg borgrepo=/path/to/borgrepo scan=`pwd`
# borg create /path/to/borgrepo `pwd`::{now}
# git annex sync borg

View file

@ -0,0 +1,55 @@
importtree=yes remotes are untrusted, because something is modifying that
remote other than git-annex, and it could change a file at any time, so
git-annex can't rely on the file being there. However, it's possible the user
has a policy of not letting files on the remote be modified. It may even be
that some remotes use storage that avoids such problems. So, there should be
some way to override the default trust level for such remotes.
Currently:
joey@darkstar:/tmp/y8>git annex semitrust borg
semitrust borg
This remote's trust level is overridden to untrusted.
The borg special remote is one example of one where it's easy for the user to
decide they're going to not delete old archives from it, and so want git-annex
to trust it.
Below is some docs I wrote for the borg special remote page, should be
moved there when this gets fixed. --[[Joey]]
## trust levels, borg delete and borg prune
git-annex will by default treat the borg special remote as untrusted, so
will not trust it to continue to contain a [[copy|copies]] of any annexed
file. This is necessary because you could run `borg delete` or `borg prune`
and remove the copy from the borg repository. If you choose to set the
trust level of the borg repository to a higher level, you need to avoid
using such commands with that borg repository.
Consider this example:
git-annex add annexedfile
borg create /path/to/borgrepo `pwd`::foo
git-annex sync borg
git-annex semitrust borg
git-annex drop annexedfile
Now the only copy of annexedfile is in the borg repository.
borg create /path/to/borgrepo `pwd`::bar
borg delete /path/to/borgrepo::foo
git-annex sync borg
git-annex whereis annexedfile
Now no copies of annexfile remain, because the "foo" archive
in the borg repository was the only one to contain it, and it was deleted.
So either keep the borg special remote as untrusted, and use such borg
commands to delete old archives as needed, or avoid using `borg delete`
and `borg prune`, and then the remote can safely be made semitrusted or
trusted.
Also, if you do choose to delete old archives, make sure to never reuse
that archive name for a new archive. git-annex may think it's the same
archive it saw before, and not notice the change.

View file

@ -0,0 +1,5 @@
The tree generated by git-annex sync with a borg remote
does not seem to get grafted into the git-annex branch, so
would be subject to being lost to GC.
Is this a general problem affecting importtree too?

View file

@ -0,0 +1,2 @@
Subject says it all really, sync does not try to get content
from remotes that are thirdPartyPopulated yet.

View file

@ -940,6 +940,7 @@ Executable git-annex
Remote
Remote.Adb
Remote.BitTorrent
Remote.Borg
Remote.Bup
Remote.Ddar
Remote.Directory
@ -962,6 +963,7 @@ Executable git-annex
Remote.Helper.Messages
Remote.Helper.P2P
Remote.Helper.ReadOnly
Remote.Helper.ThirdPartyPopulated
Remote.Helper.Special
Remote.Helper.Ssh
Remote.HttpAlso