diff --git a/Annex/Drop.hs b/Annex/Drop.hs index 1a6af18586..fca96c1530 100644 --- a/Annex/Drop.hs +++ b/Annex/Drop.hs @@ -11,7 +11,7 @@ import Annex.Common import qualified Annex import Logs.Trust import Annex.NumCopies -import Types.Remote (uuid, appendonly, config) +import Types.Remote (uuid, appendonly, config, remotetype, thirdPartyPopulated) import qualified Remote import qualified Command.Drop import Command @@ -88,6 +88,7 @@ handleDropsFrom locs rs reason fromhere key afile si preverified runner = do | appendonly r = go fs rest n | exportTree (config r) = go fs rest n | importTree (config r) = go fs rest n + | thirdPartyPopulated (remotetype r) = go fs rest n | checkcopies n (Just $ Remote.uuid r) = dropr fs r n >>= go fs rest | otherwise = pure n diff --git a/Annex/Import.hs b/Annex/Import.hs index 7c59f9a5ec..201d9e5f7e 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -12,6 +12,7 @@ module Annex.Import ( ImportCommitConfig(..), buildImportCommit, buildImportTrees, + recordImportTree, canImportKeys, importKeys, makeImportMatcher, @@ -104,6 +105,28 @@ buildImportCommit remote importtreeconfig importcommitconfig importable = Just trackingcommit -> inRepo (Git.Ref.tree trackingcommit) >>= \case Nothing -> go Nothing Just _ -> go (Just trackingcommit) + where + go trackingcommit = do + (imported, updatestate) <- recordImportTree remote importtreeconfig importable + buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case + Just finalcommit -> do + updatestate + return (Just finalcommit) + Nothing -> return Nothing + +{- Builds a tree for an import from a special remote. + - + - Also returns an action that can be used to update + - all the other state to record the import. + -} +recordImportTree + :: Remote + -> ImportTreeConfig + -> ImportableContents (Either Sha Key) + -> Annex (History Sha, Annex ()) +recordImportTree remote importtreeconfig importable = do + imported@(History finaltree _) <- buildImportTrees basetree subdir importable + return (imported, updatestate finaltree) where basetree = case importtreeconfig of ImportTree -> emptyTree @@ -112,21 +135,12 @@ buildImportCommit remote importtreeconfig importcommitconfig importable = ImportTree -> Nothing ImportSubTree dir _ -> Just dir - go trackingcommit = do - imported@(History finaltree _) <- - buildImportTrees basetree subdir importable - buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case - Just finalcommit -> do - updatestate finaltree - return (Just finalcommit) - Nothing -> return Nothing - - updatestate committedtree = do + updatestate finaltree = do importedtree <- case subdir of - Nothing -> pure committedtree + Nothing -> pure finaltree Just dir -> let subtreeref = Ref $ - fromRef' committedtree + fromRef' finaltree <> ":" <> getTopFilePath dir in fromMaybe emptyTree @@ -308,9 +322,10 @@ importKeys :: Remote -> ImportTreeConfig -> Bool + -> Bool -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents (Either Sha Key))) -importKeys remote importtreeconfig importcontent importablecontents = do +importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents = do unless (canImportKeys remote importcontent) $ giveup "This remote does not support importing without downloading content." -- This map is used to remember content identifiers that @@ -332,7 +347,9 @@ importKeys remote importtreeconfig importcontent importablecontents = do go oldversion cidmap importing (ImportableContents l h) db = do largematcher <- largeFilesMatcher jobs <- forM l $ \i -> - startimport cidmap importing db i oldversion largematcher + if thirdpartypopulated + then thirdpartypopulatedimport cidmap db i + else startimport cidmap importing db i oldversion largematcher l' <- liftIO $ forM jobs $ either pure (atomically . takeTMVar) if any isNothing l' @@ -391,6 +408,20 @@ importKeys remote importtreeconfig importcontent importablecontents = do importaction return (Right job) + thirdpartypopulatedimport cidmap db (loc, (cid, sz)) = + case Remote.importKey ia of + Nothing -> return $ Left Nothing + Just importkey -> + tryNonAsync (importkey loc cid sz nullMeterUpdate) >>= \case + Right (Just k) -> do + recordcidkey cidmap db cid k + logChange k (Remote.uuid remote) InfoPresent + return $ Left $ Just (loc, Right k) + Right Nothing -> return $ Left Nothing + Left e -> do + warning (show e) + return $ Left Nothing + importordownload cidmap db (loc, (cid, sz)) largematcher= do f <- locworktreefile loc matcher <- largematcher f @@ -433,25 +464,22 @@ importKeys remote importtreeconfig importcontent importablecontents = do return Nothing where importer = do - unsizedk <- importkey loc cid - -- Don't display progress when generating - -- key, if the content will later be - -- downloaded, which is a more expensive - -- operation generally. - (if importcontent then nullMeterUpdate else p) - -- This avoids every remote needing - -- to add the size. - let k = alterKey unsizedk $ \kd -> kd - { keySize = keySize kd <|> Just sz } - checkSecureHashes k >>= \case - Nothing -> do - recordcidkey cidmap db cid k - logChange k (Remote.uuid remote) InfoPresent - if importcontent - then getcontent k - else return (Just (k, True)) - Just msg -> giveup (msg ++ " to import") - + -- Don't display progress when generating + -- key, if the content will later be + -- downloaded, which is a more expensive + -- operation generally. + let p' = if importcontent then nullMeterUpdate else p + importkey loc cid sz p' >>= \case + Nothing -> return Nothing + Just k -> checkSecureHashes k >>= \case + Nothing -> do + recordcidkey cidmap db cid k + logChange k (Remote.uuid remote) InfoPresent + if importcontent + then getcontent k + else return (Just (k, True)) + Just msg -> giveup (msg ++ " to import") + getcontent :: Key -> Annex (Maybe (Key, Bool)) getcontent k = do let af = AssociatedFile (Just f) @@ -630,14 +658,17 @@ makeImportMatcher r = load preferredContentKeylessTokens >>= \case - regardless. (Similar to how git add behaves on gitignored files.) - This avoids creating a remote tracking branch that, when merged, - would delete the files. + - + - Throws exception if unable to contact the remote. + - Returns Nothing when there is no change since last time. -} getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) -getImportableContents r importtreeconfig ci matcher = +getImportableContents r importtreeconfig ci matcher = do Remote.listImportableContents (Remote.importActions r) >>= \case - Nothing -> return Nothing Just importable -> do dbhandle <- Export.openDb (Remote.uuid r) Just <$> filterunwanted dbhandle importable + Nothing -> return Nothing where filterunwanted dbhandle ic = ImportableContents <$> filterM (wanted dbhandle) (importableContents ic) diff --git a/Assistant/DaemonStatus.hs b/Assistant/DaemonStatus.hs index d7c5819951..34e34073ba 100644 --- a/Assistant/DaemonStatus.hs +++ b/Assistant/DaemonStatus.hs @@ -57,7 +57,8 @@ calcSyncRemotes = do contentremotes <- filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) $ filter (\r -> Remote.uuid r /= NoUUID) syncable let (exportremotes, nonexportremotes) = partition (exportTree . Remote.config) contentremotes - let dataremotes = filter (not . importTree . Remote.config) nonexportremotes + let isimport r = importTree (Remote.config r) || Remote.thirdPartyPopulated (Remote.remotetype r) + let dataremotes = filter (not . isimport) nonexportremotes return $ \dstatus -> dstatus { syncRemotes = syncable diff --git a/Command/Import.hs b/Command/Import.hs index fd788438f4..e1560ce93a 100644 --- a/Command/Import.hs +++ b/Command/Import.hs @@ -306,7 +306,7 @@ seekRemote remote branch msubdir importcontent ci = do void $ includeCommandAction (listContents remote importtreeconfig ci importabletvar) liftIO (atomically (readTVar importabletvar)) >>= \case Nothing -> return () - Just importable -> importKeys remote importtreeconfig importcontent importable >>= \case + Just importable -> importKeys remote importtreeconfig importcontent False importable >>= \case Nothing -> warning $ concat [ "Failed to import some files from " , Remote.name remote @@ -324,21 +324,25 @@ seekRemote remote branch msubdir importcontent ci = do listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart listContents remote importtreeconfig ci tvar = starting "list" ai si $ + listContents' remote importtreeconfig ci $ \importable -> do + liftIO $ atomically $ writeTVar tvar importable + next $ return True + where + ai = ActionItemOther (Just (Remote.name remote)) + si = SeekInput [] + +listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a +listContents' remote importtreeconfig ci a = makeImportMatcher remote >>= \case - Right matcher -> getImportableContents remote importtreeconfig ci matcher >>= \case - Just importable -> next $ do - liftIO $ atomically $ writeTVar tvar (Just importable) - return True - Nothing -> giveup $ "Unable to list contents of " ++ Remote.name remote + Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case + Right importable -> a importable + Left e -> giveup $ "Unable to list contents of " ++ Remote.name remote ++ ": " ++ show e Left err -> giveup $ unwords [ "Cannot import from" , Remote.name remote , "because of a problem with its configuration:" , err ] - where - ai = ActionItemOther (Just (Remote.name remote)) - si = SeekInput [] commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents (Either Sha Key) -> CommandStart commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable = diff --git a/Command/Sync.hs b/Command/Sync.hs index b9387df421..0bfe4241fb 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -67,7 +67,7 @@ import Annex.UpdateInstead import Annex.Export import Annex.TaggedPush import Annex.CurrentBranch -import Annex.Import (canImportKeys) +import Annex.Import import Annex.CheckIgnore import Types.FileMatcher import qualified Database.Export as Export @@ -211,8 +211,9 @@ seek' o = do dataremotes <- filter (\r -> Remote.uuid r /= NoUUID) <$> filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . Remote.gitconfig) remotes let (exportremotes, nonexportremotes) = partition (exportTree . Remote.config) dataremotes - let importremotes = filter (importTree . Remote.config) dataremotes - let keyvalueremotes = filter (not . importTree . Remote.config) nonexportremotes + let isimport r = importTree (Remote.config r) || Remote.thirdPartyPopulated (Remote.remotetype r) + let importremotes = filter isimport dataremotes + let keyvalueremotes = filter (not . isimport) nonexportremotes if cleanupOption o then do @@ -464,6 +465,9 @@ pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && want importRemote :: Bool -> SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandSeek importRemote importcontent o mergeconfig remote currbranch | not (pullOption o) || not wantpull = noop + | Remote.thirdPartyPopulated (Remote.remotetype remote) = + when (canImportKeys remote importcontent) $ + importThirdPartyPopulated remote | otherwise = case remoteAnnexTrackingBranch (Remote.gitconfig remote) of Nothing -> noop Just tb -> do @@ -480,6 +484,29 @@ importRemote importcontent o mergeconfig remote currbranch where wantpull = remoteAnnexPull (Remote.gitconfig remote) +{- Import from a remote that is populated by a third party, by listing + - the contents of the remote, and then adding only the files on it that + - importKey identifies to a tree. The tree is only used to keep track + - of where keys are located on the remote, no remote tracking branch is + - updated, because the filenames are the names of annex object files, + - not suitable for a tracking branch. Does not transfer any content. -} +importThirdPartyPopulated :: Remote -> CommandSeek +importThirdPartyPopulated remote = + void $ includeCommandAction $ starting "list" ai si $ + Command.Import.listContents' remote ImportTree (CheckGitIgnore False) go + where + go (Just importable) = importKeys remote ImportTree False True importable >>= \case + Just importablekeys -> do + (_imported, updatestate) <- recordImportTree remote ImportTree importablekeys + next $ do + updatestate + return True + Nothing -> next $ return False + go Nothing = next $ return True -- unchanged from before + + ai = ActionItemOther (Just (Remote.name remote)) + si = SeekInput [] + {- The remote probably has both a master and a synced/master branch. - Which to merge from? Well, the master has whatever latest changes - were committed (or pushed changes, if this is a bare remote), diff --git a/Git/LsTree.hs b/Git/LsTree.hs index ead501f0dc..cd0d406edf 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -1,17 +1,17 @@ {- git ls-tree interface - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} -{-# LANGUAGE BangPatterns #-} - module Git.LsTree ( TreeItem(..), LsTreeMode(..), lsTree, lsTree', + lsTreeStrict, + lsTreeStrict', lsTreeParams, lsTreeFiles, parseLsTree, @@ -30,6 +30,7 @@ import Data.Either import System.Posix.Types import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L +import qualified Data.Attoparsec.ByteString as AS import qualified Data.Attoparsec.ByteString.Lazy as A import qualified Data.Attoparsec.ByteString.Char8 as A8 @@ -38,7 +39,7 @@ data TreeItem = TreeItem , typeobj :: S.ByteString , sha :: Ref , file :: TopFilePath - } deriving Show + } deriving (Show) data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive @@ -51,6 +52,13 @@ lsTree' ps lsmode t repo = do (l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo return (rights (map parseLsTree l), cleanup) +lsTreeStrict :: LsTreeMode -> Ref -> Repo -> IO [TreeItem] +lsTreeStrict = lsTreeStrict' [] + +lsTreeStrict' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO [TreeItem] +lsTreeStrict' ps lsmode t repo = rights . map parseLsTreeStrict + <$> pipeNullSplitStrict (lsTreeParams lsmode t ps) repo + lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam] lsTreeParams lsmode r ps = [ Param "ls-tree" @@ -83,6 +91,13 @@ parseLsTree b = case A.parse parserLsTree b of A.Done _ r -> Right r A.Fail _ _ err -> Left err +parseLsTreeStrict :: S.ByteString -> Either String TreeItem +parseLsTreeStrict b = go (AS.parse parserLsTree b) + where + go (AS.Done _ r) = Right r + go (AS.Fail _ _ err) = Left err + go (AS.Partial c) = go (c mempty) + {- Parses a line of ls-tree output, in format: - mode SP type SP sha TAB file - diff --git a/Git/Types.hs b/Git/Types.hs index 77a52d1e45..73c4fe62de 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -135,7 +135,12 @@ fmtObjectType CommitObject = "commit" fmtObjectType TreeObject = "tree" {- Types of items in a tree. -} -data TreeItemType = TreeFile | TreeExecutable | TreeSymlink | TreeSubmodule +data TreeItemType + = TreeFile + | TreeExecutable + | TreeSymlink + | TreeSubmodule + | TreeSubtree deriving (Eq, Show) {- Git uses magic numbers to denote the type of a tree item. -} @@ -144,6 +149,7 @@ readTreeItemType "100644" = Just TreeFile readTreeItemType "100755" = Just TreeExecutable readTreeItemType "120000" = Just TreeSymlink readTreeItemType "160000" = Just TreeSubmodule +readTreeItemType "040000" = Just TreeSubtree readTreeItemType _ = Nothing fmtTreeItemType :: TreeItemType -> S.ByteString @@ -151,12 +157,14 @@ fmtTreeItemType TreeFile = "100644" fmtTreeItemType TreeExecutable = "100755" fmtTreeItemType TreeSymlink = "120000" fmtTreeItemType TreeSubmodule = "160000" +fmtTreeItemType TreeSubtree = "040000" toTreeItemType :: FileMode -> Maybe TreeItemType toTreeItemType 0o100644 = Just TreeFile toTreeItemType 0o100755 = Just TreeExecutable toTreeItemType 0o120000 = Just TreeSymlink toTreeItemType 0o160000 = Just TreeSubmodule +toTreeItemType 0o040000 = Just TreeSubtree toTreeItemType _ = Nothing fromTreeItemType :: TreeItemType -> FileMode @@ -164,6 +172,7 @@ fromTreeItemType TreeFile = 0o100644 fromTreeItemType TreeExecutable = 0o100755 fromTreeItemType TreeSymlink = 0o120000 fromTreeItemType TreeSubmodule = 0o160000 +fromTreeItemType TreeSubtree = 0o040000 data Commit = Commit { commitTree :: Sha diff --git a/Logs/ContentIdentifier.hs b/Logs/ContentIdentifier.hs index 38f904ae2b..16c3969155 100644 --- a/Logs/ContentIdentifier.hs +++ b/Logs/ContentIdentifier.hs @@ -32,12 +32,16 @@ recordContentIdentifier :: RemoteStateHandle -> ContentIdentifier -> Key -> Anne recordContentIdentifier (RemoteStateHandle u) cid k = do c <- liftIO currentVectorClock config <- Annex.getGitConfig - Annex.Branch.change (remoteContentIdentifierLogFile config k) $ - buildLog . addcid c . parseLog + Annex.Branch.maybeChange (remoteContentIdentifierLogFile config k) $ + addcid c . parseLog where - addcid c l = changeMapLog c u (cid :| contentIdentifierList (M.lookup u m)) l + addcid c v + | cid `elem` l = Nothing -- no change needed + | otherwise = Just $ buildLog $ + changeMapLog c u (cid :| l) v where - m = simpleMap l + m = simpleMap v + l = contentIdentifierList (M.lookup u m) -- | Get all known content identifiers for a key. getContentIdentifiers :: Key -> Annex [(RemoteStateHandle, [ContentIdentifier])] diff --git a/Logs/Export.hs b/Logs/Export.hs index 1c198b7992..50b2ea1378 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -1,4 +1,4 @@ -{- git-annex export log +{- git-annex export log (also used to log imports) - - Copyright 2017-2019 Joey Hess - @@ -64,7 +64,6 @@ exportedTreeishes = nub . map exportedTreeish incompleteExportedTreeishes :: [Exported] -> [Git.Ref] incompleteExportedTreeishes = concatMap incompleteExportedTreeish - data ExportParticipants = ExportParticipants { exportFrom :: UUID , exportTo :: UUID diff --git a/Remote/Adb.hs b/Remote/Adb.hs index de5b62d030..f67df51754 100644 --- a/Remote/Adb.hs +++ b/Remote/Adb.hs @@ -46,6 +46,7 @@ remote = specialRemoteType $ RemoteType , setup = adbSetup , exportSupported = exportIsSupported , importSupported = importIsSupported + , thirdPartyPopulated = False } androiddirectoryField :: RemoteConfigField @@ -286,8 +287,11 @@ renameExportM serial adir _k old new = do ] listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) -listImportableContentsM serial adir = - process <$> adbShell serial +listImportableContentsM serial adir = adbfind >>= \case + Just ls -> return $ Just $ ImportableContents (mapMaybe mk ls) [] + Nothing -> giveup "adb find failed" + where + adbfind = adbShell serial [ Param "find" -- trailing slash is needed, or android's find command -- won't recurse into the directory @@ -297,9 +301,6 @@ listImportableContentsM serial adir = , Param "-c", Param statformat , Param "{}", Param "+" ] - where - process Nothing = Nothing - process (Just ls) = Just $ ImportableContents (mapMaybe mk ls) [] statformat = adbStatFormat ++ "\t%n" diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index a60b58506c..61e660270a 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -49,6 +49,7 @@ remote = RemoteType , setup = error "not supported" , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } -- There is only one bittorrent remote, and it always exists. diff --git a/Remote/Borg.hs b/Remote/Borg.hs new file mode 100644 index 0000000000..70a01f8ec1 --- /dev/null +++ b/Remote/Borg.hs @@ -0,0 +1,318 @@ +{- Using borg as a remote. + - + - Copyright 2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Remote.Borg (remote) where + +import Annex.Common +import Types.Remote +import Types.Creds +import Types.Import +import qualified Git +import qualified Git.LsTree as LsTree +import Git.Types (toTreeItemType, TreeItemType(..)) +import Git.FilePath +import Config +import Config.Cost +import Annex.Tmp +import Annex.SpecialRemote.Config +import Remote.Helper.Special +import Remote.Helper.ExportImport +import Annex.UUID +import Types.ProposedAccepted +import Utility.Metered +import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated +import Logs.Export + +import Data.Either +import Text.Read +import Control.Exception (evaluate) +import Control.DeepSeq +import qualified Data.Map as M +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L +import qualified System.FilePath.ByteString as P + +type BorgRepo = String + +type BorgArchiveName = S.ByteString + +remote :: RemoteType +remote = RemoteType + { typename = "borg" + , enumerate = const (findSpecialRemotes "borgrepo") + , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser borgrepoField + (FieldDesc "(required) borg repository to use") + ] + , setup = borgSetup + , exportSupported = exportUnsupported + , importSupported = importIsSupported + , thirdPartyPopulated = True + } + +borgrepoField :: RemoteConfigField +borgrepoField = Accepted "borgrepo" + +gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +gen r u rc gc rs = do + c <- parsedRemoteConfig remote rc + cst <- remoteCost gc $ + if borgLocal borgrepo + then nearlyCheapRemoteCost + else expensiveRemoteCost + return $ Just $ Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyDummy + , retrieveKeyFile = retrieveKeyFileDummy + , retrieveKeyFileCheap = Nothing + -- Borg cryptographically verifies content. + , retrievalSecurityPolicy = RetrievalAllKeysSecure + , removeKey = removeKeyDummy + , lockContent = Nothing + , checkPresent = checkPresentDummy + , checkPresentCheap = borgLocal borgrepo + , exportActions = exportUnsupported + , importActions = ImportActions + { listImportableContents = listImportableContentsM u borgrepo + , importKey = Just ThirdPartyPopulated.importKey + , retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM borgrepo + , checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierM borgrepo + -- This remote is thirdPartyPopulated, so these + -- actions will never be used. + , storeExportWithContentIdentifier = storeExportWithContentIdentifier importUnsupported + , removeExportDirectoryWhenEmpty = removeExportDirectoryWhenEmpty importUnsupported + , removeExportWithContentIdentifier = removeExportWithContentIdentifier importUnsupported + } + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , getRepo = return r + , gitconfig = gc + , localpath = if borgLocal borgrepo && not (null borgrepo) + then Just borgrepo + else Nothing + , remotetype = remote + , availability = if borgLocal borgrepo then LocallyAvailable else GloballyAvailable + , readonly = False + , appendonly = False + , mkUnavailable = return Nothing + , getInfo = return [("repo", borgrepo)] + , claimUrl = Nothing + , checkUrl = Nothing + , remoteStateHandle = rs + } + where + borgrepo = fromMaybe (giveup "missing borgrepo") $ remoteAnnexBorgRepo gc + +borgSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) +borgSetup _ mu _ c _gc = do + u <- maybe (liftIO genUUID) return mu + + -- verify configuration is sane + let borgrepo = maybe (giveup "Specify borgrepo=") fromProposedAccepted $ + M.lookup borgrepoField c + + -- The borgrepo is stored in git config, as well as this repo's + -- persistant state, so it can vary between hosts. + gitConfigSpecialRemote u c [("borgrepo", borgrepo)] + + return (c, u) + +borgLocal :: BorgRepo -> Bool +borgLocal = notElem ':' + +borgArchive :: BorgRepo -> BorgArchiveName -> String +borgArchive r n = r ++ "::" ++ decodeBS' n + +listImportableContentsM :: UUID -> BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) +listImportableContentsM u borgrepo = prompt $ do + imported <- getImported u + ls <- withborglist borgrepo "{barchive}{NUL}" $ \as -> + forM as $ \archivename -> + case M.lookup archivename imported of + Just getfast -> return $ Left (archivename, getfast) + Nothing -> Right <$> + let archive = borgArchive borgrepo archivename + in withborglist archive "{size}{NUL}{path}{NUL}" $ + liftIO . evaluate . force . parsefilelist archivename + if all isLeft ls && M.null (M.difference imported (M.fromList (lefts ls))) + then return Nothing -- unchanged since last time, avoid work + else Just . mkimportablecontents <$> mapM (either snd pure) ls + where + withborglist what format a = do + let p = (proc "borg" ["list", what, "--format", format]) + { std_out = CreatePipe } + (Nothing, Just h, Nothing, pid) <- liftIO $ createProcess p + l <- liftIO $ map L.toStrict + . filter (not . L.null) + . L.split 0 + <$> L.hGetContents h + let cleanup = liftIO $ do + hClose h + forceSuccessProcess p pid + a l `finally` cleanup + + parsefilelist archivename (bsz:f:rest) = case readMaybe (fromRawFilePath bsz) of + Nothing -> parsefilelist archivename rest + Just sz -> + let loc = genImportLocation archivename f + -- This does a little unncessary work to parse the + -- key, which is then thrown away. But, it lets the + -- file list be shrank down to only the ones that are + -- importable keys, so avoids needing to buffer all + -- the rest of the files in memory. + in case ThirdPartyPopulated.importKey' loc sz of + Just _k -> (loc, (borgContentIdentifier, sz)) + : parsefilelist archivename rest + Nothing -> parsefilelist archivename rest + parsefilelist _ _ = [] + + -- importableHistory is not used for retrieval, so is not + -- populated with old archives. Instead, a tree of archives + -- is constructed, by genImportLocation including the archive + -- name in the ImportLocation. + mkimportablecontents l = ImportableContents + { importableContents = concat l + , importableHistory = [] + } + +-- We do not need a ContentIdentifier in order to retrieve a file from +-- borg; the ImportLocation contains all that's needed. So, this is left +-- empty. +borgContentIdentifier :: ContentIdentifier +borgContentIdentifier = ContentIdentifier mempty + +-- Borg does not allow / in the name of an archive, so the archive +-- name will always be the first directory in the ImportLocation. +-- +-- Paths in a borg archive are always relative, not absolute, so the use of +-- to combine the archive name with the path will always work. +genImportLocation :: BorgArchiveName -> RawFilePath -> ImportLocation +genImportLocation archivename p = + ThirdPartyPopulated.mkThirdPartyImportLocation $ + archivename P. p + +extractImportLocation :: ImportLocation -> (BorgArchiveName, RawFilePath) +extractImportLocation loc = go $ P.splitDirectories $ + ThirdPartyPopulated.fromThirdPartyImportLocation loc + where + go (archivename:rest) = (archivename, P.joinPath rest) + go _ = giveup $ "Unable to parse import location " ++ fromRawFilePath (fromImportLocation loc) + +-- Since the ImportLocation starts with the archive name, a list of all +-- archive names we've already imported can be found by just listing the +-- last imported tree. And the contents of those archives can be retrieved +-- by listing the subtree recursively, which will likely be quite a lot +-- faster than running borg. +getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (ContentIdentifier, ByteSize))])) +getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u) + where + go t = M.fromList . mapMaybe mk + <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive t) + + mk ti + | toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just + ( getTopFilePath (LsTree.file ti) + , getcontents + (getTopFilePath (LsTree.file ti)) + (LsTree.sha ti) + ) + | otherwise = Nothing + + getcontents archivename t = mapMaybe (mkcontents archivename) + <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive t) + + mkcontents archivename ti = do + let f = ThirdPartyPopulated.fromThirdPartyImportLocation $ + mkImportLocation $ getTopFilePath $ LsTree.file ti + k <- deserializeKey' (P.takeFileName f) + return + ( genImportLocation archivename f + , + ( borgContentIdentifier + -- defaulting to 0 size is ok, this size + -- only gets used by + -- ThirdPartyPopulated.importKey, + -- which ignores the size when the key + -- does not have a size. + , fromMaybe 0 (fromKey keySize k) + ) + ) + +-- Check if the file is still there in the borg archive. +-- Does not check that the content is unchanged; we assume that +-- the content of files in borg archives does not change, which is normally +-- the case. But archives may be deleted, and files may be deleted. +checkPresentExportWithContentIdentifierM :: BorgRepo -> Key -> ImportLocation -> [ContentIdentifier] -> Annex Bool +checkPresentExportWithContentIdentifierM borgrepo _ loc _ = prompt $ liftIO $ do + let p = proc "borg" + [ "list" + , "--format" + , "1" + , borgArchive borgrepo archivename + , fromRawFilePath archivefile + ] + -- borg list exits nonzero with an error message if an archive + -- no longer exists. But, the user can delete archives at any + -- time they want. So, hide errors, and if it exists nonzero, + -- check if the borg repository still exists, and only throw an + -- exception if not. + (Nothing, Just h, Nothing, pid) <- withNullHandle $ \nullh -> + createProcess $ p + { std_out = CreatePipe + , std_err = UseHandle nullh + } + ok <- (== "1") <$> hGetContentsStrict h + hClose h + ifM (checkSuccessProcess pid) + ( return ok + , checkrepoexists + ) + where + (archivename, archivefile) = extractImportLocation loc + + checkrepoexists = do + let p = proc "borg" + [ "list" + , "--format" + , "1" + , borgrepo + ] + (Nothing, Nothing, Nothing, pid) <- withNullHandle $ \nullh -> + createProcess $ p + { std_out = UseHandle nullh } + ifM (checkSuccessProcess pid) + ( return False -- repo exists, content not in it + , giveup $ "Unable to access borg repository " ++ borgrepo + ) + +retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key +retrieveExportWithContentIdentifierM borgrepo loc _ dest mkk _ = do + showOutput + prompt $ withOtherTmp $ \othertmp -> liftIO $ do + -- borgrepo could be relative, and borg has to be run + -- in the temp directory to get it to write there + absborgrepo <- fromRawFilePath <$> absPath (toRawFilePath borgrepo) + let p = proc "borg" + [ "extract" + , borgArchive absborgrepo archivename + , fromRawFilePath archivefile + ] + (Nothing, Nothing, Nothing, pid) <- createProcess $ p + { cwd = Just (fromRawFilePath othertmp) } + forceSuccessProcess p pid + -- Filepaths in borg archives are relative, so it's ok to + -- combine with + moveFile (fromRawFilePath othertmp fromRawFilePath archivefile) dest + removeDirectoryRecursive (fromRawFilePath othertmp) + mkk + where + (archivename, archivefile) = extractImportLocation loc diff --git a/Remote/Bup.hs b/Remote/Bup.hs index f87888b688..8950ac4670 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -50,6 +50,7 @@ remote = specialRemoteType $ RemoteType , setup = bupSetup , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } buprepoField :: RemoteConfigField diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 514b978474..4c544d28cd 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -45,6 +45,7 @@ remote = specialRemoteType $ RemoteType , setup = ddarSetup , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } ddarrepoField :: RemoteConfigField diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 4be5850bb5..6de71fffe9 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -56,6 +56,7 @@ remote = specialRemoteType $ RemoteType , setup = directorySetup , exportSupported = exportIsSupported , importSupported = importIsSupported + , thirdPartyPopulated = False } directoryField :: RemoteConfigField @@ -337,10 +338,10 @@ removeExportLocation topdir loc = in go (upFrom loc') =<< tryIO (removeDirectory p) listImportableContentsM :: RawFilePath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) -listImportableContentsM dir = catchMaybeIO $ liftIO $ do +listImportableContentsM dir = liftIO $ do l <- dirContentsRecursive (fromRawFilePath dir) l' <- mapM (go . toRawFilePath) l - return $ ImportableContents (catMaybes l') [] + return $ Just $ ImportableContents (catMaybes l') [] where go f = do st <- R.getFileStatus f @@ -369,13 +370,15 @@ guardSameContentIdentifiers cont old new | new == Just old = cont | otherwise = giveup "file content has changed" -importKeyM :: RawFilePath -> ExportLocation -> ContentIdentifier -> MeterUpdate -> Annex Key -importKeyM dir loc cid p = do +importKeyM :: RawFilePath -> ExportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key) +importKeyM dir loc cid sz p = do backend <- chooseBackend f - k <- fst <$> genKey ks p backend + unsizedk <- fst <$> genKey ks p backend + let k = alterKey unsizedk $ \kd -> kd + { keySize = keySize kd <|> Just sz } currcid <- liftIO $ mkContentIdentifier absf =<< R.getFileStatus absf - guardSameContentIdentifiers (return k) cid currcid + guardSameContentIdentifiers (return (Just k)) cid currcid where f = fromExportLocation loc absf = dir P. f diff --git a/Remote/External.hs b/Remote/External.hs index 255c5e2456..4921bb5027 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -53,6 +53,7 @@ remote = specialRemoteType $ RemoteType , setup = externalSetup , exportSupported = checkExportSupported , importSupported = importUnsupported + , thirdPartyPopulated = False } externaltypeField :: RemoteConfigField diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 73c5b7b50c..af1ddc712f 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -78,6 +78,7 @@ remote = specialRemoteType $ RemoteType , setup = gCryptSetup , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } gitRepoField :: RemoteConfigField diff --git a/Remote/Git.hs b/Remote/Git.hs index f9e0711401..62751a5607 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -87,6 +87,7 @@ remote = RemoteType , setup = gitSetup , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } locationField :: RemoteConfigField diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index d2c749359f..f9cf7f7512 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -74,6 +74,7 @@ remote = specialRemoteType $ RemoteType , setup = mySetup , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } urlField :: RemoteConfigField diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index a5420d9987..d4b2365226 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -48,6 +48,7 @@ remote = specialRemoteType $ RemoteType , setup = glacierSetup , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } datacenterField :: RemoteConfigField diff --git a/Remote/Helper/ExportImport.hs b/Remote/Helper/ExportImport.hs index 9ff6f92800..9e2840e2f5 100644 --- a/Remote/Helper/ExportImport.hs +++ b/Remote/Helper/ExportImport.hs @@ -54,7 +54,7 @@ instance HasImportUnsupported (ParsedRemoteConfig -> RemoteGitConfig -> Annex Bo instance HasImportUnsupported (ImportActions Annex) where importUnsupported = ImportActions - { listImportableContents = return Nothing + { listImportableContents = nope , importKey = Nothing , retrieveExportWithContentIdentifier = nope , storeExportWithContentIdentifier = nope @@ -72,7 +72,7 @@ importIsSupported :: ParsedRemoteConfig -> RemoteGitConfig -> Annex Bool importIsSupported = \_ _ -> return True -- | Prevent or allow exporttree=yes and importtree=yes when --- setting up a new remote, depending on exportSupported and importSupported. +-- setting up a new remote, depending on the remote's capabilities. adjustExportImportRemoteType :: RemoteType -> RemoteType adjustExportImportRemoteType rt = rt { setup = setup' } where @@ -80,7 +80,7 @@ adjustExportImportRemoteType rt = rt { setup = setup' } pc <- either giveup return . parseRemoteConfig c =<< configParser rt c let checkconfig supported configured configfield cont = - ifM (supported rt pc gc) + ifM (supported rt pc gc <&&> pure (not (thirdPartyPopulated rt))) ( case st of Init | configured pc && encryptionIsEnabled pc -> @@ -102,8 +102,13 @@ adjustExportImportRemoteType rt = rt { setup = setup' } -- | Adjust a remote to support exporttree=yes and/or importree=yes. adjustExportImport :: Remote -> RemoteStateHandle -> Annex Remote adjustExportImport r rs = do - isexport <- pure (exportTree (config r)) <&&> isExportSupported r - isimport <- pure (importTree (config r)) <&&> isImportSupported r + isexport <- pure (exportTree (config r)) + <&&> isExportSupported r + -- When thirdPartyPopulated is True, the remote + -- does not need to be configured with importTree to support + -- imports. + isimport <- pure (importTree (config r) || thirdPartyPopulated (remotetype r)) + <&&> isImportSupported r let r' = r { remotetype = (remotetype r) { exportSupported = if isexport @@ -139,11 +144,13 @@ adjustExportImport' isexport isimport r rs = do -- when another repository has already stored the -- key, and the local repository does not know -- about it. To avoid unnecessary costs, don't do it. - if isexport - then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it" - else if isimport - then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it" - else storeKey r k af p + if mergeable + then if isexport + then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it" + else if isimport + then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it" + else storeKey r k af p + else storeKey r k af p , removeKey = \k -> -- Removing a key from an export would need to -- change the tree in the export log to not include @@ -151,12 +158,14 @@ adjustExportImport' isexport isimport r rs = do -- files would not be dealt with correctly. -- There does not seem to be a good use case for -- removing a key from an export in any case. - if isexport - then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove" - else if isimport - then giveup "dropping content from this remote is not supported because it is configured with importtree=yes" - else removeKey r k - , lockContent = if iskeyvaluestore + if mergeable + then if isexport + then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove" + else if isimport + then giveup "dropping content from this remote is not supported because it is configured with importtree=yes" + else removeKey r k + else removeKey r k + , lockContent = if iskeyvaluestore || not mergeable then lockContent r else Nothing , retrieveKeyFile = \k af dest p -> @@ -180,8 +189,11 @@ adjustExportImport' isexport isimport r rs = do -- was exported to are present. This -- doesn't guarantee the export -- contains the right content, - -- which is why export remotes - -- are untrusted. + -- if the remote is an export, + -- or if something else can write + -- to it. Remotes that have such + -- problems are made untrusted, + -- so it's not worried about here. then anyM (checkPresentExport (exportActions r) k) =<< getexportlocs dbv k else checkPresent r k @@ -201,17 +213,23 @@ adjustExportImport' isexport isimport r rs = do else return Nothing , getInfo = do is <- getInfo r - is' <- if isexport + is' <- if isexport && not mergeable then do ts <- map fromRef . exportedTreeishes <$> getExport (uuid r) - return (is++[("export", "yes"), ("exportedtree", unwords ts)]) + return (is++[("exporttree", "yes"), ("exportedtree", unwords ts)]) else return is - return $ if isimport - then (is'++[("import", "yes")]) + return $ if isimport && not mergeable + then (is'++[("importtree", "yes")]) else is' } where + -- When a remote is populated by a third party, a tree can be + -- imported from it, but that tree is not mergeable into the + -- user's own git branch. But annex objects found in the tree + -- (identified by importKey) can still be retrieved from the remote. + mergeable = thirdPartyPopulated (remotetype r) + -- exportActions adjusted to use the equivilant import actions, -- which take ContentIdentifiers into account. exportActionsForImport dbv ciddbv ea = ea diff --git a/Remote/Helper/ThirdPartyPopulated.hs b/Remote/Helper/ThirdPartyPopulated.hs new file mode 100644 index 0000000000..c10a5c25cb --- /dev/null +++ b/Remote/Helper/ThirdPartyPopulated.hs @@ -0,0 +1,86 @@ +{- Helpers for thirdPartyPopulated remotes + - + - Copyright 2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Remote.Helper.ThirdPartyPopulated where + +import Annex.Common +import Types.Remote +import Types.Import +import Crypto (isEncKey) +import Utility.Metered + +import qualified System.FilePath.ByteString as P +import qualified Data.ByteString as S + +-- When a remote is thirdPartyPopulated, the files we want are probably +-- in the .git directory. But, git does not really support .git in paths +-- in a git tree. (Such a tree can be built, but it will lead to problems.) +-- And so anything in .git is prevented from being imported. +-- To work around that, this renames that directory when generating an +-- ImportLocation. +mkThirdPartyImportLocation :: RawFilePath -> ImportLocation +mkThirdPartyImportLocation = + mkImportLocation . P.joinPath . map esc . P.splitDirectories + where + esc ".git" = "dotgit" + esc x + | "dotgit" `S.isSuffixOf` x = "dot" <> x + | otherwise = x + +fromThirdPartyImportLocation :: ImportLocation -> RawFilePath +fromThirdPartyImportLocation = + P.joinPath . map unesc . P.splitDirectories . fromImportLocation + where + unesc "dotgit" = ".git" + unesc x + | "dotgit" `S.isSuffixOf` x = S.drop 3 x + | otherwise = x + +-- When a remote is thirdPartyPopulated, and contains a backup of a +-- git-annex repository or some special remotes, this can be used to +-- find only those ImportLocations that are annex object files. +-- All other ImportLocations are ignored. +importKey :: ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key) +importKey loc _cid sz _ = return $ importKey' loc sz + +importKey' :: ImportLocation -> ByteSize -> Maybe Key +importKey' loc sz = case deserializeKey' f of + Just k + -- Annex objects always are in a subdirectory with the same + -- name as the filename. If this is not the case for the file + -- that was backed up, it is probably not a valid annex object. + -- Eg, it could be something in annex/bad/, or annex/tmp/. + -- Or it could be a file that only happens to have a name + -- like an annex object. + -- (This does unfortunately prevent recognizing files that are + -- part of special remotes that don't use that layout. The most + -- likely special remote to be in a backup, the directory + -- special remote, does use that layout at least.) + | lastMaybe (P.splitDirectories (P.dropFileName p)) /= Just f -> Nothing + -- Chunked or encrypted keys used in special remotes are not + -- supported. + | isChunkKey k || isEncKey k -> Nothing + -- Check that the size of the key is the same as the size of the + -- file stored in the backup. This is a cheap way to make sure it's + -- probabably the actual content of the file. We don't fully + -- verify the content here because that could be a very + -- expensive operation for a large repository; if the user + -- wants to detect every possible data corruption problem + -- (eg, wrong data read off disk during backup, or the object + -- was corrupt in the git-annex repo and that bad object got + -- backed up), they can fsck the remote. + | otherwise -> case fromKey keySize k of + Just sz' + | sz' == sz -> Just k + | otherwise -> Nothing + Nothing -> Just k + Nothing -> Nothing + where + p = fromImportLocation loc + f = P.takeFileName p diff --git a/Remote/Hook.hs b/Remote/Hook.hs index cc0ead39e7..89611113b7 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -40,6 +40,7 @@ remote = specialRemoteType $ RemoteType , setup = hookSetup , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } hooktypeField :: RemoteConfigField diff --git a/Remote/HttpAlso.hs b/Remote/HttpAlso.hs index 9411f3a842..7beb52426a 100644 --- a/Remote/HttpAlso.hs +++ b/Remote/HttpAlso.hs @@ -41,6 +41,7 @@ remote = RemoteType , setup = httpAlsoSetup , exportSupported = exportIsSupported , importSupported = importUnsupported + , thirdPartyPopulated = False } urlField :: RemoteConfigField diff --git a/Remote/List.hs b/Remote/List.hs index 7695eec902..8ca9d8f794 100644 --- a/Remote/List.hs +++ b/Remote/List.hs @@ -36,6 +36,7 @@ import qualified Remote.Glacier import qualified Remote.Ddar import qualified Remote.GitLFS import qualified Remote.HttpAlso +import qualified Remote.Borg import qualified Remote.Hook import qualified Remote.External @@ -57,6 +58,7 @@ remoteTypes = map adjustExportImportRemoteType , Remote.Ddar.remote , Remote.GitLFS.remote , Remote.HttpAlso.remote + , Remote.Borg.remote , Remote.Hook.remote , Remote.External.remote ] diff --git a/Remote/P2P.hs b/Remote/P2P.hs index 5016c9f059..859205bfcc 100644 --- a/Remote/P2P.hs +++ b/Remote/P2P.hs @@ -41,6 +41,7 @@ remote = RemoteType , setup = error "P2P remotes are set up using git-annex p2p" , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } chainGen :: P2PAddress -> Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 813c0f1bbd..7627fbd2c6 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -60,6 +60,7 @@ remote = specialRemoteType $ RemoteType , setup = rsyncSetup , exportSupported = exportIsSupported , importSupported = importUnsupported + , thirdPartyPopulated = False } shellEscapeField :: RemoteConfigField diff --git a/Remote/S3.hs b/Remote/S3.hs index b35a62186e..90db63bb1d 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -118,6 +118,7 @@ remote = specialRemoteType $ RemoteType , setup = s3Setup , exportSupported = exportIsSupported , importSupported = importIsSupported + , thirdPartyPopulated = False } bucketField :: RemoteConfigField @@ -552,12 +553,11 @@ renameExportS3 hv r rs info k src dest = Just <$> go listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) listImportableContentsS3 hv r info = withS3Handle hv $ \case - Nothing -> do - warning $ needS3Creds (uuid r) - return Nothing - Just h -> catchMaybeIO $ liftIO $ runResourceT $ - extractFromResourceT =<< startlist h + Nothing -> giveup $ needS3Creds (uuid r) + Just h -> Just <$> go h where + go h = liftIO $ runResourceT $ extractFromResourceT =<< startlist h + startlist h | versioning info = do rsp <- sendS3Handle h $ diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index 72df4c70e8..3fbae0df9b 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -67,6 +67,7 @@ remote = specialRemoteType $ RemoteType , setup = tahoeSetup , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } scsField :: RemoteConfigField diff --git a/Remote/Web.hs b/Remote/Web.hs index bbe24b38fc..f5de143c27 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -32,6 +32,7 @@ remote = RemoteType , setup = error "not supported" , exportSupported = exportUnsupported , importSupported = importUnsupported + , thirdPartyPopulated = False } -- There is only one web remote, and it always exists. diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index b649ae0444..5163942b47 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -57,6 +57,7 @@ remote = specialRemoteType $ RemoteType , setup = webdavSetup , exportSupported = exportIsSupported , importSupported = importUnsupported + , thirdPartyPopulated = False } urlField :: RemoteConfigField diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index cb50c79666..74df213419 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -327,6 +327,7 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexGnupgDecryptOptions :: [String] , remoteAnnexRsyncUrl :: Maybe String , remoteAnnexBupRepo :: Maybe String + , remoteAnnexBorgRepo :: Maybe String , remoteAnnexTahoe :: Maybe FilePath , remoteAnnexBupSplitOptions :: [String] , remoteAnnexDirectory :: Maybe FilePath @@ -391,6 +392,7 @@ extractRemoteGitConfig r remotename = do , remoteAnnexGnupgDecryptOptions = getoptions "gnupg-decrypt-options" , remoteAnnexRsyncUrl = notempty $ getmaybe "rsyncurl" , remoteAnnexBupRepo = getmaybe "buprepo" + , remoteAnnexBorgRepo = getmaybe "borgrepo" , remoteAnnexTahoe = getmaybe "tahoe" , remoteAnnexBupSplitOptions = getoptions "bup-split-options" , remoteAnnexDirectory = notempty $ getmaybe "directory" diff --git a/Types/Import.hs b/Types/Import.hs index 2724ddc7ef..2013e44cde 100644 --- a/Types/Import.hs +++ b/Types/Import.hs @@ -59,6 +59,12 @@ data ImportableContents info = ImportableContents -- ^ Used by remotes that support importing historical versions of -- files that are stored in them. This is equivilant to a git -- commit history. + -- + -- When retrieving a historical version of a file, + -- old ImportLocations from importableHistory are not used; + -- the content is no longer expected to be present at those + -- locations. So, if a remote does not support Key/Value access, + -- it should not populate the importableHistory. } deriving (Show, Generic) diff --git a/Types/Remote.hs b/Types/Remote.hs index 7b5f1ac0e5..cc5fb47a23 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -63,10 +63,15 @@ data RemoteTypeA a = RemoteType , configParser :: RemoteConfig -> a RemoteConfigParser -- initializes or enables a remote , setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID) - -- check if a remote of this type is able to support export of trees + -- check if a remote of this type is able to support export , exportSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool - -- check if a remote of this type is able to support import of trees + -- check if a remote of this type is able to support import , importSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool + -- is a remote of this type not a usual key/value store, + -- or export/import of a tree of files, but instead a collection + -- of files, populated by something outside git-annex, some of + -- which may be annex objects? + , thirdPartyPopulated :: Bool } instance Eq (RemoteTypeA a) where @@ -113,9 +118,9 @@ data RemoteA a = Remote -- Some remotes can checkPresent without an expensive network -- operation. , checkPresentCheap :: Bool - -- Some remotes support export of trees of files. + -- Some remotes support export. , exportActions :: ExportActions a - -- Some remotes support import of trees of files. + -- Some remotes support import. , importActions :: ImportActions a -- Some remotes can provide additional details for whereis. , whereisKey :: Maybe (Key -> a [String]) @@ -276,6 +281,9 @@ data ImportActions a = ImportActions -- -- May also find old versions of files that are still stored in the -- remote. + -- + -- Throws exception on failure to access the remote. + -- May return Nothing when the remote is unchanged since last time. { listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize))) -- Generates a Key (of any type) for the file stored on the -- remote at the ImportLocation. Does not download the file @@ -288,8 +296,13 @@ data ImportActions a = ImportActions -- bearing in mind that the file on the remote may have changed -- since the ContentIdentifier was generated. -- - -- Throws exception on failure. - , importKey :: Maybe (ImportLocation -> ContentIdentifier -> MeterUpdate -> a Key) + -- When the remote is thirdPartyPopulated, this should check if the + -- file stored on the remote is the content of an annex object, + -- and return its Key, or Nothing if it is not. Should not + -- otherwise return Nothing. + -- + -- Throws exception on failure to access the remote. + , importKey :: Maybe (ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> a (Maybe Key)) -- Retrieves a file from the remote. Ensures that the file -- it retrieves has the requested ContentIdentifier. -- diff --git a/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn b/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn index d1b255a8d0..9d474dbcbf 100644 --- a/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn +++ b/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn @@ -167,7 +167,9 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`. this can be used to list those versions. It opens a new block of responses. This can be repeated any number of times (indicating a branching history), and histories can also - be nested multiple levels deep. + be nested multiple levels deep. + This should only be used when the remote supports using + "TRANSFER RECEIVE Key" to retrieve historical versions of files. * `END` Indicates the end of a block of responses. * `LOCATION Name` diff --git a/doc/devblog/day_637__thirdparty_of_borg.mdwn b/doc/devblog/day_637__thirdparty_of_borg.mdwn new file mode 100644 index 0000000000..5d4a34c999 --- /dev/null +++ b/doc/devblog/day_637__thirdparty_of_borg.mdwn @@ -0,0 +1,17 @@ +Finally gotten started on the borg special remote idea. A prerequisite of +that is remotes that can be imported from, but not exported to. So I +actually started by allowing setting importtree=yes without +exporttree=yes. A lot of code had assumptions about that not being allowed, +so it took a while to chase down everything. Finished most of that yesterday. + +What I've done today is added a `thirdPartyPopulated` type of remote, +which `git-annex sync` can "pull" from by using the existing import +interface to list files on it, and determine which of them are annex object +files. I have not started on the actual borg remote at all, but this should +be all the groundwork for it done. + +(I also finished up annex.stalldetection earlier this week.) + +--- + +This work was sponsored by Jake Vosloo [on Patreon](https://patreon.com/joeyh). diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 6280bcb864..45306545c7 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1541,6 +1541,12 @@ Remotes are configured using these settings in `.git/config`. the location of the bup repository to use. Normally this is automatically set up by `git annex initremote`, but you can change it if needed. +* `remote..annex-borgrepo` + + Used by borg special remotes, this configures + the location of the borg repository to use. Normally this is automatically + set up by `git annex initremote`, but you can change it if needed. + * `remote..annex-ddarrepo` Used by ddar special remotes, this configures diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn index 29a1f36347..515699313e 100644 --- a/doc/special_remotes.mdwn +++ b/doc/special_remotes.mdwn @@ -25,6 +25,7 @@ the git history is not stored in them. * [[webdav]] * [[git]] * [[httpalso]] +* [[borg]] * [[xmpp]] The above special remotes are built into git-annex, and can be used diff --git a/doc/special_remotes/borg.mdwn b/doc/special_remotes/borg.mdwn new file mode 100644 index 0000000000..02f8258559 --- /dev/null +++ b/doc/special_remotes/borg.mdwn @@ -0,0 +1,32 @@ +This special remote type accesses annexed files stored in a +[borg](https://www.borgbackup.org/) repository. + +Unlike most special remotes, git-annex cannot be used to store annexed +files in this special remote. You store files by using borg as usual, to +back up the git-annex repository. Then `git-annex sync` will learn about +the annexed files that are stored in the borg repository. + +## configuration + +These parameters can be passed to `git annex initremote` to configure the +remote: + +* `borgrepo` - The location of a borg repository, eg a path, or + `user@host:path` for ssh access. + +* `scan` - The path, within the borg repository, to scan for + annex object files. This can be the path to a git-annex repository, + or perhaps a non-encrypted special remote, or a path that contains + several repositories. + + Information about all annex objects in the path will be + added to the git-annex branch when syncing with the borg repository. + So, it's best to avoid a path that contains object files for unrelated + git-annex repositories. + +## setup example + + # borg init --encryption=keyfile /path/to/borgrepo + # git annex initremote borg type=borg borgrepo=/path/to/borgrepo scan=`pwd` + # borg create /path/to/borgrepo `pwd`::{now} + # git annex sync borg diff --git a/doc/todo/allow_overriding_untrust_of_import_remotes.mdwn b/doc/todo/allow_overriding_untrust_of_import_remotes.mdwn new file mode 100644 index 0000000000..99d6009d52 --- /dev/null +++ b/doc/todo/allow_overriding_untrust_of_import_remotes.mdwn @@ -0,0 +1,55 @@ +importtree=yes remotes are untrusted, because something is modifying that +remote other than git-annex, and it could change a file at any time, so +git-annex can't rely on the file being there. However, it's possible the user +has a policy of not letting files on the remote be modified. It may even be +that some remotes use storage that avoids such problems. So, there should be +some way to override the default trust level for such remotes. + +Currently: + + joey@darkstar:/tmp/y8>git annex semitrust borg + semitrust borg + This remote's trust level is overridden to untrusted. + +The borg special remote is one example of one where it's easy for the user to +decide they're going to not delete old archives from it, and so want git-annex +to trust it. + +Below is some docs I wrote for the borg special remote page, should be +moved there when this gets fixed. --[[Joey]] + +## trust levels, borg delete and borg prune + +git-annex will by default treat the borg special remote as untrusted, so +will not trust it to continue to contain a [[copy|copies]] of any annexed +file. This is necessary because you could run `borg delete` or `borg prune` +and remove the copy from the borg repository. If you choose to set the +trust level of the borg repository to a higher level, you need to avoid +using such commands with that borg repository. + +Consider this example: + + git-annex add annexedfile + borg create /path/to/borgrepo `pwd`::foo + git-annex sync borg + git-annex semitrust borg + git-annex drop annexedfile + +Now the only copy of annexedfile is in the borg repository. + + borg create /path/to/borgrepo `pwd`::bar + borg delete /path/to/borgrepo::foo + git-annex sync borg + git-annex whereis annexedfile + +Now no copies of annexfile remain, because the "foo" archive +in the borg repository was the only one to contain it, and it was deleted. + +So either keep the borg special remote as untrusted, and use such borg +commands to delete old archives as needed, or avoid using `borg delete` +and `borg prune`, and then the remote can safely be made semitrusted or +trusted. + +Also, if you do choose to delete old archives, make sure to never reuse +that archive name for a new archive. git-annex may think it's the same +archive it saw before, and not notice the change. diff --git a/doc/todo/borg_sync_tree_not_grafted.mdwn b/doc/todo/borg_sync_tree_not_grafted.mdwn new file mode 100644 index 0000000000..a4423706e1 --- /dev/null +++ b/doc/todo/borg_sync_tree_not_grafted.mdwn @@ -0,0 +1,5 @@ +The tree generated by git-annex sync with a borg remote +does not seem to get grafted into the git-annex branch, so +would be subject to being lost to GC. + +Is this a general problem affecting importtree too? diff --git a/doc/todo/sync_--content_with_borg_does_not_get_content.mdwn b/doc/todo/sync_--content_with_borg_does_not_get_content.mdwn new file mode 100644 index 0000000000..a8755fd3d2 --- /dev/null +++ b/doc/todo/sync_--content_with_borg_does_not_get_content.mdwn @@ -0,0 +1,2 @@ +Subject says it all really, sync does not try to get content +from remotes that are thirdPartyPopulated yet. diff --git a/git-annex.cabal b/git-annex.cabal index 48a1ed5d5c..91bf59d44d 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -940,6 +940,7 @@ Executable git-annex Remote Remote.Adb Remote.BitTorrent + Remote.Borg Remote.Bup Remote.Ddar Remote.Directory @@ -962,6 +963,7 @@ Executable git-annex Remote.Helper.Messages Remote.Helper.P2P Remote.Helper.ReadOnly + Remote.Helper.ThirdPartyPopulated Remote.Helper.Special Remote.Helper.Ssh Remote.HttpAlso