use GIT keys for export of non-annexed files

This solves the problem that import of such files gets confused and
converts them back to annexed files.

The import code already used GIT keys internally when it determined a
file should not be annexed. So now when it sees a GIT key that export
used, it already does the right thing.

This also means that even older version of git-annex can import and will
do the right thing, once a fixed version has exported. Still, there may
be other complications around upgrades; still need to think it all
through.

Moved gitShaKey and keyGitSha from Key to Annex.Export since they're
only used for export/import.

Documented GIT keys in backends, since they do appear in the git-annex
branch now.

This commit was sponsored by Graham Spencer on Patreon.
This commit is contained in:
Joey Hess 2021-03-05 14:03:51 -04:00
parent deac6f12b5
commit fc61915230
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 80 additions and 88 deletions

View file

@ -1,10 +1,12 @@
{- git-annex exports
-
- Copyright 2017 Joey Hess <id@joeyh.name>
- Copyright 2017-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Export where
import Annex
@ -15,31 +17,36 @@ import qualified Git
import qualified Types.Remote as Remote
import Messages
import Control.Applicative
import Data.Maybe
import Prelude
-- An export includes both annexed files and files stored in git.
-- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key
deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key
asKey (AnnexKey k) = k
asKey (GitKey k) = k
exportKey :: Git.Sha -> Annex ExportKey
-- From a sha pointing to the content of a file to the key
-- to use to export it. When the file is annexed, it's the annexed key.
-- When the file is stored in git, it's a special type of key to indicate
-- that.
exportKey :: Git.Sha -> Annex Key
exportKey sha = mk <$> catKey sha
where
mk (Just k) = AnnexKey k
mk Nothing = GitKey $ mkKey $ \k -> k
{ keyName = Git.fromRef' sha
, keyVariety = SHA1Key (HasExt False)
, keySize = Nothing
, keyMtime = Nothing
, keyChunkSize = Nothing
, keyChunkNum = Nothing
}
mk (Just k) = k
mk Nothing = gitShaKey sha
-- Encodes a git sha as a key. This is used to represent a non-annexed
-- file that is stored on a special remote, which necessarily needs a
-- key.
--
-- This is not the same as a SHA1 key, because the mapping needs to be
-- bijective, also because git may not always use SHA1, and because git
-- takes a SHA1 of the file size + content, while git-annex SHA1 keys
-- only checksum the content.
gitShaKey :: Git.Sha -> Key
gitShaKey (Git.Ref s) = mkKey $ \kd -> kd
{ keyName = s
, keyVariety = OtherKey "GIT"
}
-- Reverse of gitShaKey
keyGitSha :: Key -> Maybe Git.Sha
keyGitSha k
| fromKey keyVariety k == OtherKey "GIT" =
Just (Git.Ref (fromKey keyName k))
| otherwise = Nothing
warnExportImportConflict :: Remote -> Annex ()
warnExportImportConflict r = do

View file

@ -179,10 +179,11 @@ recordImportTree remote importtreeconfig importable = do
updatelocationlog oldexport finaltree = do
let stillpresent db k = liftIO $ not . null
<$> Export.getExportedLocation db k
let updater db oldkey _newkey _ = case oldkey of
Just (AnnexKey k) -> unlessM (stillpresent db k) $
logChange k (Remote.uuid remote) InfoMissing
Just (GitKey _) -> noop
let updater db moldkey _newkey _ = case moldkey of
Just oldkey -> case keyGitSha oldkey of
Nothing -> unlessM (stillpresent db oldkey) $
logChange oldkey (Remote.uuid remote) InfoMissing
Just _ -> noop
Nothing -> noop
db <- Export.openDb (Remote.uuid remote)
forM_ (exportedTreeishes oldexport) $ \oldtree ->

View file

@ -72,9 +72,9 @@ optParser _ = ExportOptions
-- To handle renames which swap files, the exported file is first renamed
-- to a stable temporary name based on the key.
exportTempName :: ExportKey -> ExportLocation
exportTempName :: Key -> ExportLocation
exportTempName ek = mkExportLocation $ toRawFilePath $
".git-annex-tmp-content-" ++ serializeKey (asKey (ek))
".git-annex-tmp-content-" ++ serializeKey ek
seek :: ExportOptions -> CommandSeek
seek o = startConcurrency commandStages $ do
@ -203,8 +203,8 @@ changeExport r db (PreferredFiltered new) = do
sequence_ $ map a diff
void $ liftIO cleanup
-- Map of old and new filenames for each changed ExportKey in a diff.
type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath)
-- Map of old and new filenames for each changed Key in a diff.
type DiffMap = M.Map Key (Maybe TopFilePath, Maybe TopFilePath)
mkDiffMap :: Git.Ref -> Git.Ref -> ExportHandle -> Annex DiffMap
mkDiffMap old new db = do
@ -259,7 +259,7 @@ startExport r db cvar allfilledvar ti = do
ek <- exportKey (Git.LsTree.sha ti)
stopUnless (notrecordedpresent ek) $
starting ("export " ++ name r) ai si $
ifM (either (const False) id <$> tryNonAsync (checkPresentExport (exportActions r) (asKey ek) loc))
ifM (either (const False) id <$> tryNonAsync (checkPresentExport (exportActions r) ek loc))
( next $ cleanupExport r db ek loc False
, do
liftIO $ modifyMVar_ cvar (pure . const (FileUploaded True))
@ -272,39 +272,39 @@ startExport r db cvar allfilledvar ti = do
ai = ActionItemOther (Just (fromRawFilePath f))
si = SeekInput []
notrecordedpresent ek = (||)
<$> liftIO (notElem loc <$> getExportedLocation db (asKey ek))
<$> liftIO (notElem loc <$> getExportedLocation db ek)
-- If content was removed from the remote, the export db
-- will still list it, so also check location tracking.
<*> (notElem (uuid r) <$> loggedLocations (asKey ek))
<*> (notElem (uuid r) <$> loggedLocations ek)
performExport :: Remote -> ExportHandle -> ExportKey -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform
performExport :: Remote -> ExportHandle -> Key -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform
performExport r db ek af contentsha loc allfilledvar = do
let storer = storeExport (exportActions r)
sent <- tryNonAsync $ case ek of
AnnexKey k -> ifM (inAnnex k)
sent <- tryNonAsync $ case keyGitSha ek of
Nothing -> ifM (inAnnex ek)
( notifyTransfer Upload af $
-- alwaysUpload because the same key
-- could be used for more than one export
-- location, and concurrently uploading
-- of the content should still be allowed.
alwaysUpload (uuid r) k af Nothing stdRetry $ \pm -> do
alwaysUpload (uuid r) ek af Nothing stdRetry $ \pm -> do
let rollback = void $
performUnexport r db [ek] loc
sendAnnex k rollback $ \f ->
sendAnnex ek rollback $ \f ->
Remote.action $
storer f k loc pm
storer f ek loc pm
, do
showNote "not available"
return False
)
-- Sending a non-annexed file.
GitKey sha1k ->
Just _ ->
withTmpFile "export" $ \tmp h -> do
b <- catObject contentsha
liftIO $ L.hPut h b
liftIO $ hClose h
Remote.action $
storer tmp sha1k loc nullMeterUpdate
storer tmp ek loc nullMeterUpdate
let failedsend = liftIO $ modifyMVar_ allfilledvar (pure . const (AllFilled False))
case sent of
Right True -> next $ cleanupExport r db ek loc True
@ -315,11 +315,11 @@ performExport r db ek af contentsha loc allfilledvar = do
failedsend
throwM err
cleanupExport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> Bool -> CommandCleanup
cleanupExport :: Remote -> ExportHandle -> Key -> ExportLocation -> Bool -> CommandCleanup
cleanupExport r db ek loc sent = do
liftIO $ addExportedLocation db (asKey ek) loc
liftIO $ addExportedLocation db ek loc
when sent $
logChange (asKey ek) (uuid r) InfoPresent
logChange ek (uuid r) InfoPresent
return True
startUnexport :: Remote -> ExportHandle -> TopFilePath -> [Git.Sha] -> CommandStart
@ -335,7 +335,7 @@ startUnexport r db f shas = do
ai = ActionItemOther (Just (fromRawFilePath f'))
si = SeekInput []
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startUnexport' r db f ek =
starting ("unexport " ++ name r) ai si $
performUnexport r db [ek] loc
@ -350,20 +350,20 @@ startUnexport' r db f ek =
-- remote is untrusted, so would not count as a copy anyway.
-- Or, an export may be appendonly, and removing a file from it does
-- not really remove the content, which must be accessible later on.
performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform
performUnexport :: Remote -> ExportHandle -> [Key] -> ExportLocation -> CommandPerform
performUnexport r db eks loc = do
ifM (allM rm eks)
( next $ cleanupUnexport r db eks loc
, stop
)
where
rm ek = Remote.action $ removeExport (exportActions r) (asKey ek) loc
rm ek = Remote.action $ removeExport (exportActions r) ek loc
cleanupUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandCleanup
cleanupUnexport :: Remote -> ExportHandle -> [Key] -> ExportLocation -> CommandCleanup
cleanupUnexport r db eks loc = do
liftIO $ do
forM_ eks $ \ek ->
removeExportedLocation db (asKey ek) loc
removeExportedLocation db ek loc
flushDbQueue db
-- An versionedExport remote supports removeExportLocation to remove
@ -371,12 +371,12 @@ cleanupUnexport r db eks loc = do
-- and allows retrieving it.
unless (versionedExport (exportActions r)) $ do
remaininglocs <- liftIO $
concat <$> forM eks (\ek -> getExportedLocation db (asKey ek))
concat <$> forM eks (getExportedLocation db)
when (null remaininglocs) $
forM_ eks $ \ek ->
logChange (asKey ek) (uuid r) InfoMissing
logChange ek (uuid r) InfoMissing
removeEmptyDirectories r db loc (map asKey eks)
removeEmptyDirectories r db loc eks
startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
startRecoverIncomplete r db sha oldf
@ -387,12 +387,12 @@ startRecoverIncomplete r db sha oldf
let ai = ActionItemOther (Just (fromRawFilePath (fromExportLocation loc)))
let si = SeekInput []
starting ("unexport " ++ name r) ai si $ do
liftIO $ removeExportedLocation db (asKey ek) oldloc
liftIO $ removeExportedLocation db ek oldloc
performUnexport r db [ek] loc
where
oldloc = mkExportLocation $ getTopFilePath oldf
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startMoveToTempName r db f ek =
starting ("rename " ++ name r) ai si $
performRename r db ek loc tmploc
@ -403,11 +403,11 @@ startMoveToTempName r db f ek =
ai = ActionItemOther $ Just $ fromRawFilePath f' ++ " -> " ++ fromRawFilePath (fromExportLocation tmploc)
si = SeekInput []
startMoveFromTempName :: Remote -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart
startMoveFromTempName :: Remote -> ExportHandle -> Key -> TopFilePath -> CommandStart
startMoveFromTempName r db ek f = do
let tmploc = exportTempName ek
let ai = ActionItemOther (Just (fromRawFilePath (fromExportLocation tmploc) ++ " -> " ++ fromRawFilePath f'))
stopUnless (liftIO $ elem tmploc <$> getExportedLocation db (asKey ek)) $
stopUnless (liftIO $ elem tmploc <$> getExportedLocation db ek) $
starting ("rename " ++ name r) ai si $
performRename r db ek tmploc loc
where
@ -415,9 +415,9 @@ startMoveFromTempName r db ek f = do
f' = getTopFilePath f
si = SeekInput []
performRename :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandPerform
performRename :: Remote -> ExportHandle -> Key -> ExportLocation -> ExportLocation -> CommandPerform
performRename r db ek src dest =
tryNonAsync (renameExport (exportActions r) (asKey ek) src dest) >>= \case
tryNonAsync (renameExport (exportActions r) ek src dest) >>= \case
Right (Just ()) -> next $ cleanupRename r db ek src dest
Left err -> do
warning $ "rename failed (" ++ show err ++ "); deleting instead"
@ -427,14 +427,14 @@ performRename r db ek src dest =
where
fallbackdelete = performUnexport r db [ek] src
cleanupRename :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandCleanup
cleanupRename :: Remote -> ExportHandle -> Key -> ExportLocation -> ExportLocation -> CommandCleanup
cleanupRename r db ek src dest = do
liftIO $ do
removeExportedLocation db (asKey ek) src
addExportedLocation db (asKey ek) dest
removeExportedLocation db ek src
addExportedLocation db ek dest
flushDbQueue db
if exportDirectories src /= exportDirectories dest
then removeEmptyDirectories r db src [asKey ek]
then removeEmptyDirectories r db src [ek]
else return True
-- | Remove empty directories from the export. Call after removing an

View file

@ -200,9 +200,9 @@ removeExportTree h k loc = queueDb h $
-- and updates state.
type ExportDiffUpdater
= ExportHandle
-> Maybe ExportKey
-> Maybe Key
-- ^ old exported key
-> Maybe ExportKey
-> Maybe Key
-- ^ new exported key
-> Git.DiffTree.DiffTreeItem
-> Annex ()
@ -214,10 +214,10 @@ mkExportDiffUpdater
mkExportDiffUpdater removeold addnew h srcek dstek i = do
case srcek of
Nothing -> return ()
Just k -> liftIO $ removeold h (asKey k) loc
Just k -> liftIO $ removeold h k loc
case dstek of
Nothing -> return ()
Just k -> liftIO $ addnew h (asKey k) loc
Just k -> liftIO $ addnew h k loc
where
loc = mkExportLocation $ getTopFilePath $ Git.DiffTree.file i

20
Key.hs
View file

@ -23,8 +23,6 @@ module Key (
nonChunkKey,
chunkKeyOffset,
isChunkKey,
gitShaKey,
keyGitSha,
isKeyPrefix,
splitKeyNameExtension,
@ -37,7 +35,6 @@ import qualified Data.Attoparsec.ByteString as A
import Common
import Types.Key
import Git.Types
import Utility.QuickCheck
import Utility.Bloom
import Utility.Aeson
@ -61,23 +58,6 @@ chunkKeyOffset k = (*)
isChunkKey :: Key -> Bool
isChunkKey k = isJust (fromKey keyChunkSize k) && isJust (fromKey keyChunkNum k)
-- Encodes a git sha as a key.
--
-- This is not the same as a SHA1 key, because the mapping needs to be
-- bijective, also because git may not always use SHA1.
gitShaKey :: Sha -> Key
gitShaKey (Ref s) = mkKey $ \kd -> kd
{ keyName = s
, keyVariety = OtherKey "GIT"
}
-- Reverse of gitShaKey
keyGitSha :: Key -> Maybe Sha
keyGitSha k
| fromKey keyVariety k == OtherKey "GIT" =
Just (Ref (fromKey keyName k))
| otherwise = Nothing
serializeKey :: Key -> String
serializeKey = decodeBS' . serializeKey'

View file

@ -76,6 +76,10 @@ content of an annexed file remains unchanged.
passing it to a shell script. The URL-backend key is distinct from URLs/URIs
that may be attached to a key (from any backend) indicating the key's location
on the web or in one of [[special_remotes]].
* `GIT` -- This is used internally by git-annex when exporting trees
containing files stored in git, rather than git-annex. It represents a
git sha. This is never used for git-annex links, but information about
keys of this type is stored in the git-annex branch.
## external backends