include borg archive name in tree, use empty ContentIdentifier

It's unusual to use a ContentIdentifier that is not semi-unique
for different contents. Note that in importKeys, it checks if a content
identifier is one that's known before, to avoid downloading the same
content twice. But that's done in a code path not used for borg repos,
because they are thirdpartypopulated.
This commit is contained in:
Joey Hess 2020-12-22 11:53:00 -04:00
parent c2d6f335a6
commit 7f7094a7cb
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38

View file

@ -26,10 +26,14 @@ import Text.Read
import Control.Exception (evaluate)
import Control.DeepSeq
import qualified Data.Map as M
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
type BorgRepo = String
type BorgArchiveName = S.ByteString
remote :: RemoteType
remote = RemoteType
{ typename = "borg"
@ -123,15 +127,8 @@ borgLocal = notElem ':'
-- TODO avoid rescanning archives that have already been scanned
--
-- XXX importableHistory should probably not be populated. git-annex
-- only stores and uses the most recent imported tree, not the whole history,
-- I think. So a key that's only in a previous archive would not have
-- a known ImportLocation when retrieving it.
-- Instead, maybe need to include the archive names at the top of the
-- importlocation? (Then would not need them in the ContentIdentifier.)
--
-- XXX the tree generated by using this does not seem to get grafted into
-- the git-annex branch, so would be subject to being lost to GC>
-- the git-annex branch, so would be subject to being lost to GC.
-- Is this a general problem affecting importtree too?
listImportableContentsM :: BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM borgrepo = prompt $ do
@ -140,7 +137,7 @@ listImportableContentsM borgrepo = prompt $ do
let archive' = borgrepo ++ "::" ++ decodeBS' archive
in withborglist archive' "{size}{NUL}{path}{NUL}" $
liftIO . evaluate . force . parsefilelist archive
return (Just (mkimportablecontents (reverse ls)))
return (Just (mkimportablecontents ls))
where
withborglist what format a = do
let p = (proc "borg" ["list", what, "--format", format])
@ -158,26 +155,49 @@ listImportableContentsM borgrepo = prompt $ do
parsefilelist archive (bsz:f:rest) = case readMaybe (fromRawFilePath bsz) of
Nothing -> parsefilelist archive rest
Just sz ->
let loc = ThirdPartyPopulated.mkThirdPartyImportLocation f
let loc = genImportLocation archive f
-- This does a little unncessary work to parse the
-- key, which is then thrown away. But, it lets the
-- file list be shrank down to only the ones that are
-- importable keys, so avoids needing to buffer all
-- the rest of the files in memory.
in case ThirdPartyPopulated.importKey' loc sz of
Just k -> (loc, (ContentIdentifier archive, sz))
Just k -> (loc, (borgContentIdentifier, sz))
: parsefilelist archive rest
Nothing -> parsefilelist archive rest
parsefilelist _ _ = []
mkimportablecontents [] = ImportableContents
{ importableContents = []
-- importableHistory is not used for retrieval, so is not
-- populated with old archives. Instead, a tree of archives
-- is constructed, by genImportLocation including the archive
-- name in the ImportLocation.
mkimportablecontents (l) = ImportableContents
{ importableContents = concat l
, importableHistory = []
}
mkimportablecontents (v:vs) = ImportableContents
{ importableContents = v
, importableHistory = [mkimportablecontents vs]
}
-- Borg does not allow / in the name of an archive, so the archive
-- name will always be the first directory in the ImportLocation.
--
-- Paths in a borg archive are always relative, not absolute, so the use of
-- </> to combine the archive name with the path will always work.
genImportLocation :: BorgArchiveName -> RawFilePath -> ImportLocation
genImportLocation archivename p =
ThirdPartyPopulated.mkThirdPartyImportLocation $
archivename P.</> p
extractImportLocation :: ImportLocation -> (BorgArchiveName, RawFilePath)
extractImportLocation loc = go $ P.splitDirectories $
ThirdPartyPopulated.fromThirdPartyImportLocation loc
where
go (archivename:rest) = (archivename, P.joinPath rest)
go _ = giveup $ "Unable to parse import location " ++ fromRawFilePath (fromImportLocation loc)
-- We do not need a ContentIdentifier in order to retrieve a file from
-- borg; the ImportLocation contains all that's needed. So, this is left
-- empty.
borgContentIdentifier :: ContentIdentifier
borgContentIdentifier = ContentIdentifier mempty
retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierM borgrepo loc cid dest k p = error "TODO"