bcd55b365c
Still some issues to deal with, see TODO and XXX. Here's what gets logged, for each key: cid log: 1608582045.832799227s 6720ebad-b20e-4460-a8f2-2477361aea75 !MjAyMC0xMi0yMVQxMTozMzoxNw==:!MjAyMC0xMi0yMVQxMzowNzoyNg== The "!Mj" are base64 encoded borg archive names, since mine were dates and contained some characters not allowed in cid logs unescaped. There were archives that each contained the key. This list will grow as more borg backups are done and learned about. tree generated: 120000 blob 5ef6a4615c084819b44cd4e3a31657664ddf643b x/dotgit/annex/objects/06/mv/SHA256E-s30--a5d8532e64ec28f5491e25e7a6c1cb68f80507c1be6c1b35f8ec53d25413e5da/SHA256E-s30--a5d8532e64ec28f5491e25e7a6c1cb68f80507c1be6c1b35f8ec53d25413e5da 120000 blob 063a139d3021c8db60f5c576d29fada2b824d91c x/dotgit/annex/objects/72/PP/SHA256E-s30--e80b09a854b4e4d99a76caaa6983b34272480e0b4fdb95d04234a54b4849b893/SHA256E-s30--e80b09a854b4e4d99a76caaa6983b34272480e0b4fdb95d04234a54b4849b893 120000 blob b53b54916fd6abf21fedf796deca08d5ac7a75af x/dotgit/annex/objects/Ww/pk/SHA256E-s30--6aac072a8ebf02a5807c4f15e77ed585a6c87b3b333ba625a3c8d6b4dc50a9f2/SHA256E-s30--6aac072a8ebf02a5807c4f15e77ed585a6c87b3b333ba625a3c8d6b4dc50a9f2 This commit was sponsored by Denis Dzyubenko on Patreon.
86 lines
3.3 KiB
Haskell
86 lines
3.3 KiB
Haskell
{- Helpers for thirdPartyPopulated remotes
|
|
-
|
|
- Copyright 2020 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Remote.Helper.ThirdPartyPopulated where
|
|
|
|
import Annex.Common
|
|
import Types.Remote
|
|
import Types.Import
|
|
import Crypto (isEncKey)
|
|
import Utility.Metered
|
|
|
|
import qualified System.FilePath.ByteString as P
|
|
import qualified Data.ByteString as S
|
|
|
|
-- When a remote is thirdPartyPopulated, the files we want are probably
|
|
-- in the .git directory. But, git does not really support .git in paths
|
|
-- in a git tree. (Such a tree can be built, but it will lead to problems.)
|
|
-- And so anything in .git is prevented from being imported.
|
|
-- To work around that, this renames that directory when generating an
|
|
-- ImportLocation.
|
|
mkThirdPartyImportLocation :: RawFilePath -> ImportLocation
|
|
mkThirdPartyImportLocation =
|
|
mkImportLocation . P.joinPath . map esc . P.splitDirectories
|
|
where
|
|
esc ".git" = "dotgit"
|
|
esc x
|
|
| "dotgit" `S.isSuffixOf` x = "dot" <> x
|
|
| otherwise = x
|
|
|
|
fromThirdPartyImportLocation :: ImportLocation -> RawFilePath
|
|
fromThirdPartyImportLocation =
|
|
P.joinPath . map unesc . P.splitDirectories . fromImportLocation
|
|
where
|
|
unesc "dotgit" = ".git"
|
|
unesc x
|
|
| "dotgit" `S.isSuffixOf` x = S.drop 3 x
|
|
| otherwise = x
|
|
|
|
-- When a remote is thirdPartyPopulated, and contains a backup of a
|
|
-- git-annex repository or some special remotes, this can be used to
|
|
-- find only those ImportLocations that are annex object files.
|
|
-- All other ImportLocations are ignored.
|
|
importKey :: ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key)
|
|
importKey loc _cid sz _ = return $ importKey' loc sz
|
|
|
|
importKey' :: ImportLocation -> ByteSize -> Maybe Key
|
|
importKey' loc sz = case deserializeKey' f of
|
|
Just k
|
|
-- Annex objects always are in a subdirectory with the same
|
|
-- name as the filename. If this is not the case for the file
|
|
-- that was backed up, it is probably not a valid annex object.
|
|
-- Eg, it could be something in annex/bad/, or annex/tmp/.
|
|
-- Or it could be a file that only happens to have a name
|
|
-- like an annex object.
|
|
-- (This does unfortunately prevent recognizing files that are
|
|
-- part of special remotes that don't use that layout. The most
|
|
-- likely special remote to be in a backup, the directory
|
|
-- special remote, does use that layout at least.)
|
|
| lastMaybe (P.splitDirectories (P.dropFileName p)) /= Just f -> Nothing
|
|
-- Chunked or encrypted keys used in special remotes are not
|
|
-- supported.
|
|
| isChunkKey k || isEncKey k -> Nothing
|
|
-- Check that the size of the key is the same as the size of the
|
|
-- file stored in the backup. This is a cheap way to make sure it's
|
|
-- probabably the actual content of the file. We don't fully
|
|
-- verify the content here because that could be a very
|
|
-- expensive operation for a large repository; if the user
|
|
-- wants to detect every possible data corruption problem
|
|
-- (eg, wrong data read off disk during backup, or the object
|
|
-- was corrupt in the git-annex repo and that bad object got
|
|
-- backed up), they can fsck the remote.
|
|
| otherwise -> case fromKey keySize k of
|
|
Just sz'
|
|
| sz' == sz -> Just k
|
|
| otherwise -> Nothing
|
|
Nothing -> Just k
|
|
Nothing -> Nothing
|
|
where
|
|
p = fromImportLocation loc
|
|
f = P.takeFileName p
|