include borg archive name in tree, use empty ContentIdentifier
It's unusual to use a ContentIdentifier that is not semi-unique for different contents. Note that in importKeys, it checks if a content identifier is one that's known before, to avoid downloading the same content twice. But that's done in a code path not used for borg repos, because they are thirdpartypopulated.
This commit is contained in:
parent
c2d6f335a6
commit
7f7094a7cb
1 changed files with 37 additions and 17 deletions
|
@ -26,10 +26,14 @@ import Text.Read
|
|||
import Control.Exception (evaluate)
|
||||
import Control.DeepSeq
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified System.FilePath.ByteString as P
|
||||
|
||||
type BorgRepo = String
|
||||
|
||||
type BorgArchiveName = S.ByteString
|
||||
|
||||
remote :: RemoteType
|
||||
remote = RemoteType
|
||||
{ typename = "borg"
|
||||
|
@ -123,15 +127,8 @@ borgLocal = notElem ':'
|
|||
|
||||
-- TODO avoid rescanning archives that have already been scanned
|
||||
--
|
||||
-- XXX importableHistory should probably not be populated. git-annex
|
||||
-- only stores and uses the most recent imported tree, not the whole history,
|
||||
-- I think. So a key that's only in a previous archive would not have
|
||||
-- a known ImportLocation when retrieving it.
|
||||
-- Instead, maybe need to include the archive names at the top of the
|
||||
-- importlocation? (Then would not need them in the ContentIdentifier.)
|
||||
--
|
||||
-- XXX the tree generated by using this does not seem to get grafted into
|
||||
-- the git-annex branch, so would be subject to being lost to GC>
|
||||
-- the git-annex branch, so would be subject to being lost to GC.
|
||||
-- Is this a general problem affecting importtree too?
|
||||
listImportableContentsM :: BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM borgrepo = prompt $ do
|
||||
|
@ -140,7 +137,7 @@ listImportableContentsM borgrepo = prompt $ do
|
|||
let archive' = borgrepo ++ "::" ++ decodeBS' archive
|
||||
in withborglist archive' "{size}{NUL}{path}{NUL}" $
|
||||
liftIO . evaluate . force . parsefilelist archive
|
||||
return (Just (mkimportablecontents (reverse ls)))
|
||||
return (Just (mkimportablecontents ls))
|
||||
where
|
||||
withborglist what format a = do
|
||||
let p = (proc "borg" ["list", what, "--format", format])
|
||||
|
@ -158,26 +155,49 @@ listImportableContentsM borgrepo = prompt $ do
|
|||
parsefilelist archive (bsz:f:rest) = case readMaybe (fromRawFilePath bsz) of
|
||||
Nothing -> parsefilelist archive rest
|
||||
Just sz ->
|
||||
let loc = ThirdPartyPopulated.mkThirdPartyImportLocation f
|
||||
let loc = genImportLocation archive f
|
||||
-- This does a little unncessary work to parse the
|
||||
-- key, which is then thrown away. But, it lets the
|
||||
-- file list be shrank down to only the ones that are
|
||||
-- importable keys, so avoids needing to buffer all
|
||||
-- the rest of the files in memory.
|
||||
in case ThirdPartyPopulated.importKey' loc sz of
|
||||
Just k -> (loc, (ContentIdentifier archive, sz))
|
||||
Just k -> (loc, (borgContentIdentifier, sz))
|
||||
: parsefilelist archive rest
|
||||
Nothing -> parsefilelist archive rest
|
||||
parsefilelist _ _ = []
|
||||
|
||||
mkimportablecontents [] = ImportableContents
|
||||
{ importableContents = []
|
||||
-- importableHistory is not used for retrieval, so is not
|
||||
-- populated with old archives. Instead, a tree of archives
|
||||
-- is constructed, by genImportLocation including the archive
|
||||
-- name in the ImportLocation.
|
||||
mkimportablecontents (l) = ImportableContents
|
||||
{ importableContents = concat l
|
||||
, importableHistory = []
|
||||
}
|
||||
mkimportablecontents (v:vs) = ImportableContents
|
||||
{ importableContents = v
|
||||
, importableHistory = [mkimportablecontents vs]
|
||||
}
|
||||
|
||||
-- Borg does not allow / in the name of an archive, so the archive
|
||||
-- name will always be the first directory in the ImportLocation.
|
||||
--
|
||||
-- Paths in a borg archive are always relative, not absolute, so the use of
|
||||
-- </> to combine the archive name with the path will always work.
|
||||
genImportLocation :: BorgArchiveName -> RawFilePath -> ImportLocation
|
||||
genImportLocation archivename p =
|
||||
ThirdPartyPopulated.mkThirdPartyImportLocation $
|
||||
archivename P.</> p
|
||||
|
||||
extractImportLocation :: ImportLocation -> (BorgArchiveName, RawFilePath)
|
||||
extractImportLocation loc = go $ P.splitDirectories $
|
||||
ThirdPartyPopulated.fromThirdPartyImportLocation loc
|
||||
where
|
||||
go (archivename:rest) = (archivename, P.joinPath rest)
|
||||
go _ = giveup $ "Unable to parse import location " ++ fromRawFilePath (fromImportLocation loc)
|
||||
|
||||
-- We do not need a ContentIdentifier in order to retrieve a file from
|
||||
-- borg; the ImportLocation contains all that's needed. So, this is left
|
||||
-- empty.
|
||||
borgContentIdentifier :: ContentIdentifier
|
||||
borgContentIdentifier = ContentIdentifier mempty
|
||||
|
||||
retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
|
||||
retrieveExportWithContentIdentifierM borgrepo loc cid dest k p = error "TODO"
|
||||
|
|
Loading…
Add table
Reference in a new issue