git-annex/Remote/Helper/ThirdPartyPopulated.hs

86 lines
3.3 KiB
Haskell
Raw Normal View History

{- Helpers for thirdPartyPopulated remotes
-
- Copyright 2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Remote.Helper.ThirdPartyPopulated where
import Annex.Common
import Types.Remote
import Types.Import
import Crypto (isEncKey)
import Utility.Metered
import qualified System.FilePath.ByteString as P
import qualified Data.ByteString as S
-- When a remote is thirdPartyPopulated, the files we want are probably
-- in the .git directory. But, git does not really support .git in paths
-- in a git tree. (Such a tree can be built, but it will lead to problems.)
-- And so anything in .git is prevented from being imported.
-- To work around that, this renames that directory when generating an
-- ImportLocation.
mkThirdPartyImportLocation :: RawFilePath -> ImportLocation
mkThirdPartyImportLocation =
mkImportLocation . P.joinPath . map esc . P.splitDirectories
where
esc ".git" = "dotgit"
esc x
| "dotgit" `S.isSuffixOf` x = "dot" <> x
| otherwise = x
fromThirdPartyImportLocation :: ImportLocation -> RawFilePath
fromThirdPartyImportLocation =
P.joinPath . map unesc . P.splitDirectories . fromImportLocation
where
unesc "dotgit" = ".git"
unesc x
| "dotgit" `S.isSuffixOf` x = S.drop 3 x
| otherwise = x
-- When a remote is thirdPartyPopulated, and contains a backup of a
-- git-annex repository or some special remotes, this can be used to
-- find only those ImportLocations that are annex object files.
-- All other ImportLocations are ignored.
importKey :: ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key)
importKey loc _cid sz _ = return $ importKey' (fromImportLocation loc) (Just sz)
importKey' :: RawFilePath -> Maybe ByteSize -> Maybe Key
importKey' p msz = case fileKey f of
Just k
-- Annex objects always are in a subdirectory with the same
-- name as the filename. If this is not the case for the file
-- that was backed up, it is probably not a valid annex object.
-- Eg, it could be something in annex/bad/, or annex/tmp/.
-- Or it could be a file that only happens to have a name
-- like an annex object.
-- (This does unfortunately prevent recognizing files that are
-- part of special remotes that don't use that layout. The most
-- likely special remote to be in a backup, the directory
-- special remote, does use that layout at least.)
| lastMaybe (P.splitDirectories (P.dropFileName p)) /= Just f -> Nothing
-- Chunked or encrypted keys used in special remotes are not
-- supported.
| isChunkKey k || isEncKey k -> Nothing
-- Check that the size of the key is the same as the size of the
-- file stored in the backup. This is a cheap way to make sure it's
-- probabably the actual content of the file. We don't fully
-- verify the content here because that could be a very
-- expensive operation for a large repository; if the user
-- wants to detect every possible data corruption problem
-- (eg, wrong data read off disk during backup, or the object
-- was corrupt in the git-annex repo and that bad object got
-- backed up), they can fsck the remote.
| otherwise -> case (msz, fromKey keySize k) of
(Just sz, Just sz')
| sz' == sz -> Just k
| otherwise -> Nothing
_ -> Just k
Nothing -> Nothing
where
f = P.takeFileName p