{- Helpers for thirdPartyPopulated remotes
 -
 - Copyright 2020 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE OverloadedStrings #-}

module Remote.Helper.ThirdPartyPopulated where

import Annex.Common
import Types.Remote
import Types.Import
import Crypto (isEncKey)
import Utility.Metered

import qualified System.FilePath.ByteString as P
import qualified Data.ByteString as S

-- When a remote is thirdPartyPopulated, the files we want are probably
-- in the .git directory. But, git does not really support .git in paths
-- in a git tree. (Such a tree can be built, but it will lead to problems.)
-- And so anything in .git is prevented from being imported.
-- To work around that, this renames that directory when generating an
-- ImportLocation.
mkThirdPartyImportLocation :: RawFilePath -> ImportLocation
mkThirdPartyImportLocation =
	mkImportLocation . P.joinPath . map esc . P.splitDirectories
  where
	esc ".git" = "dotgit"
	esc x
		| "dotgit" `S.isSuffixOf` x = "dot" <> x
		| otherwise = x

fromThirdPartyImportLocation :: ImportLocation -> RawFilePath
fromThirdPartyImportLocation =
	P.joinPath . map unesc . P.splitDirectories . fromImportLocation
  where
	unesc "dotgit" = ".git"
	unesc x
		| "dotgit" `S.isSuffixOf` x = S.drop 3 x
		| otherwise = x

-- When a remote is thirdPartyPopulated, and contains a backup of a
-- git-annex repository or some special remotes, this can be used to
-- find only those ImportLocations that are annex object files.
-- All other ImportLocations are ignored.
importKey :: ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key)
importKey loc _cid sz _ = return $ importKey' (fromImportLocation loc) (Just sz)

importKey' :: RawFilePath -> Maybe ByteSize -> Maybe Key
importKey' p msz = case fileKey f of
	Just k
		-- Annex objects always are in a subdirectory with the same
		-- name as the filename. If this is not the case for the file
		-- that was backed up, it is probably not a valid annex object.
		-- Eg, it could be something in annex/bad/, or annex/tmp/.
		-- Or it could be a file that only happens to have a name
		-- like an annex object.
		-- (This does unfortunately prevent recognizing files that are
		-- part of special remotes that don't use that layout. The most
		-- likely special remote to be in a backup, the directory
		-- special remote, does use that layout at least.)
		| lastMaybe (P.splitDirectories (P.dropFileName p)) /= Just f -> Nothing
		-- Chunked or encrypted keys used in special remotes are not
		-- supported.
		| isChunkKey k || isEncKey k -> Nothing
		-- Check that the size of the key is the same as the size of the
		-- file stored in the backup. This is a cheap way to make sure it's
		-- probabably the actual content of the file. We don't fully
		-- verify the content here because that could be a very 
		-- expensive operation for a large repository; if the user
		-- wants to detect every possible data corruption problem
		-- (eg, wrong data read off disk during backup, or the object
		-- was corrupt in the git-annex repo and that bad object got
		-- backed up), they can fsck the remote.
		| otherwise -> case (msz, fromKey keySize k) of
			(Just sz, Just sz')
				| sz' == sz -> Just k
				| otherwise -> Nothing
			_ -> Just k
	Nothing -> Nothing
  where
	f = P.takeFileName p