use GIT keys for export of non-annexed files

This solves the problem that import of such files gets confused and
converts them back to annexed files.

The import code already used GIT keys internally when it determined a
file should not be annexed. So now when it sees a GIT key that export
used, it already does the right thing.

This also means that even older version of git-annex can import and will
do the right thing, once a fixed version has exported. Still, there may
be other complications around upgrades; still need to think it all
through.

Moved gitShaKey and keyGitSha from Key to Annex.Export since they're
only used for export/import.

Documented GIT keys in backends, since they do appear in the git-annex
branch now.

This commit was sponsored by Graham Spencer on Patreon.
This commit is contained in:
Joey Hess 2021-03-05 14:03:51 -04:00
parent deac6f12b5
commit fc61915230
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 80 additions and 88 deletions

View file

@ -1,10 +1,12 @@
{- git-annex exports {- git-annex exports
- -
- Copyright 2017 Joey Hess <id@joeyh.name> - Copyright 2017-2021 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Export where module Annex.Export where
import Annex import Annex
@ -15,31 +17,36 @@ import qualified Git
import qualified Types.Remote as Remote import qualified Types.Remote as Remote
import Messages import Messages
import Control.Applicative -- From a sha pointing to the content of a file to the key
import Data.Maybe -- to use to export it. When the file is annexed, it's the annexed key.
import Prelude -- When the file is stored in git, it's a special type of key to indicate
-- that.
-- An export includes both annexed files and files stored in git. exportKey :: Git.Sha -> Annex Key
-- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key
deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key
asKey (AnnexKey k) = k
asKey (GitKey k) = k
exportKey :: Git.Sha -> Annex ExportKey
exportKey sha = mk <$> catKey sha exportKey sha = mk <$> catKey sha
where where
mk (Just k) = AnnexKey k mk (Just k) = k
mk Nothing = GitKey $ mkKey $ \k -> k mk Nothing = gitShaKey sha
{ keyName = Git.fromRef' sha
, keyVariety = SHA1Key (HasExt False) -- Encodes a git sha as a key. This is used to represent a non-annexed
, keySize = Nothing -- file that is stored on a special remote, which necessarily needs a
, keyMtime = Nothing -- key.
, keyChunkSize = Nothing --
, keyChunkNum = Nothing -- This is not the same as a SHA1 key, because the mapping needs to be
} -- bijective, also because git may not always use SHA1, and because git
-- takes a SHA1 of the file size + content, while git-annex SHA1 keys
-- only checksum the content.
gitShaKey :: Git.Sha -> Key
gitShaKey (Git.Ref s) = mkKey $ \kd -> kd
{ keyName = s
, keyVariety = OtherKey "GIT"
}
-- Reverse of gitShaKey
keyGitSha :: Key -> Maybe Git.Sha
keyGitSha k
| fromKey keyVariety k == OtherKey "GIT" =
Just (Git.Ref (fromKey keyName k))
| otherwise = Nothing
warnExportImportConflict :: Remote -> Annex () warnExportImportConflict :: Remote -> Annex ()
warnExportImportConflict r = do warnExportImportConflict r = do

View file

@ -179,10 +179,11 @@ recordImportTree remote importtreeconfig importable = do
updatelocationlog oldexport finaltree = do updatelocationlog oldexport finaltree = do
let stillpresent db k = liftIO $ not . null let stillpresent db k = liftIO $ not . null
<$> Export.getExportedLocation db k <$> Export.getExportedLocation db k
let updater db oldkey _newkey _ = case oldkey of let updater db moldkey _newkey _ = case moldkey of
Just (AnnexKey k) -> unlessM (stillpresent db k) $ Just oldkey -> case keyGitSha oldkey of
logChange k (Remote.uuid remote) InfoMissing Nothing -> unlessM (stillpresent db oldkey) $
Just (GitKey _) -> noop logChange oldkey (Remote.uuid remote) InfoMissing
Just _ -> noop
Nothing -> noop Nothing -> noop
db <- Export.openDb (Remote.uuid remote) db <- Export.openDb (Remote.uuid remote)
forM_ (exportedTreeishes oldexport) $ \oldtree -> forM_ (exportedTreeishes oldexport) $ \oldtree ->

View file

@ -72,9 +72,9 @@ optParser _ = ExportOptions
-- To handle renames which swap files, the exported file is first renamed -- To handle renames which swap files, the exported file is first renamed
-- to a stable temporary name based on the key. -- to a stable temporary name based on the key.
exportTempName :: ExportKey -> ExportLocation exportTempName :: Key -> ExportLocation
exportTempName ek = mkExportLocation $ toRawFilePath $ exportTempName ek = mkExportLocation $ toRawFilePath $
".git-annex-tmp-content-" ++ serializeKey (asKey (ek)) ".git-annex-tmp-content-" ++ serializeKey ek
seek :: ExportOptions -> CommandSeek seek :: ExportOptions -> CommandSeek
seek o = startConcurrency commandStages $ do seek o = startConcurrency commandStages $ do
@ -203,8 +203,8 @@ changeExport r db (PreferredFiltered new) = do
sequence_ $ map a diff sequence_ $ map a diff
void $ liftIO cleanup void $ liftIO cleanup
-- Map of old and new filenames for each changed ExportKey in a diff. -- Map of old and new filenames for each changed Key in a diff.
type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath) type DiffMap = M.Map Key (Maybe TopFilePath, Maybe TopFilePath)
mkDiffMap :: Git.Ref -> Git.Ref -> ExportHandle -> Annex DiffMap mkDiffMap :: Git.Ref -> Git.Ref -> ExportHandle -> Annex DiffMap
mkDiffMap old new db = do mkDiffMap old new db = do
@ -259,7 +259,7 @@ startExport r db cvar allfilledvar ti = do
ek <- exportKey (Git.LsTree.sha ti) ek <- exportKey (Git.LsTree.sha ti)
stopUnless (notrecordedpresent ek) $ stopUnless (notrecordedpresent ek) $
starting ("export " ++ name r) ai si $ starting ("export " ++ name r) ai si $
ifM (either (const False) id <$> tryNonAsync (checkPresentExport (exportActions r) (asKey ek) loc)) ifM (either (const False) id <$> tryNonAsync (checkPresentExport (exportActions r) ek loc))
( next $ cleanupExport r db ek loc False ( next $ cleanupExport r db ek loc False
, do , do
liftIO $ modifyMVar_ cvar (pure . const (FileUploaded True)) liftIO $ modifyMVar_ cvar (pure . const (FileUploaded True))
@ -272,39 +272,39 @@ startExport r db cvar allfilledvar ti = do
ai = ActionItemOther (Just (fromRawFilePath f)) ai = ActionItemOther (Just (fromRawFilePath f))
si = SeekInput [] si = SeekInput []
notrecordedpresent ek = (||) notrecordedpresent ek = (||)
<$> liftIO (notElem loc <$> getExportedLocation db (asKey ek)) <$> liftIO (notElem loc <$> getExportedLocation db ek)
-- If content was removed from the remote, the export db -- If content was removed from the remote, the export db
-- will still list it, so also check location tracking. -- will still list it, so also check location tracking.
<*> (notElem (uuid r) <$> loggedLocations (asKey ek)) <*> (notElem (uuid r) <$> loggedLocations ek)
performExport :: Remote -> ExportHandle -> ExportKey -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform performExport :: Remote -> ExportHandle -> Key -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform
performExport r db ek af contentsha loc allfilledvar = do performExport r db ek af contentsha loc allfilledvar = do
let storer = storeExport (exportActions r) let storer = storeExport (exportActions r)
sent <- tryNonAsync $ case ek of sent <- tryNonAsync $ case keyGitSha ek of
AnnexKey k -> ifM (inAnnex k) Nothing -> ifM (inAnnex ek)
( notifyTransfer Upload af $ ( notifyTransfer Upload af $
-- alwaysUpload because the same key -- alwaysUpload because the same key
-- could be used for more than one export -- could be used for more than one export
-- location, and concurrently uploading -- location, and concurrently uploading
-- of the content should still be allowed. -- of the content should still be allowed.
alwaysUpload (uuid r) k af Nothing stdRetry $ \pm -> do alwaysUpload (uuid r) ek af Nothing stdRetry $ \pm -> do
let rollback = void $ let rollback = void $
performUnexport r db [ek] loc performUnexport r db [ek] loc
sendAnnex k rollback $ \f -> sendAnnex ek rollback $ \f ->
Remote.action $ Remote.action $
storer f k loc pm storer f ek loc pm
, do , do
showNote "not available" showNote "not available"
return False return False
) )
-- Sending a non-annexed file. -- Sending a non-annexed file.
GitKey sha1k -> Just _ ->
withTmpFile "export" $ \tmp h -> do withTmpFile "export" $ \tmp h -> do
b <- catObject contentsha b <- catObject contentsha
liftIO $ L.hPut h b liftIO $ L.hPut h b
liftIO $ hClose h liftIO $ hClose h
Remote.action $ Remote.action $
storer tmp sha1k loc nullMeterUpdate storer tmp ek loc nullMeterUpdate
let failedsend = liftIO $ modifyMVar_ allfilledvar (pure . const (AllFilled False)) let failedsend = liftIO $ modifyMVar_ allfilledvar (pure . const (AllFilled False))
case sent of case sent of
Right True -> next $ cleanupExport r db ek loc True Right True -> next $ cleanupExport r db ek loc True
@ -315,11 +315,11 @@ performExport r db ek af contentsha loc allfilledvar = do
failedsend failedsend
throwM err throwM err
cleanupExport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> Bool -> CommandCleanup cleanupExport :: Remote -> ExportHandle -> Key -> ExportLocation -> Bool -> CommandCleanup
cleanupExport r db ek loc sent = do cleanupExport r db ek loc sent = do
liftIO $ addExportedLocation db (asKey ek) loc liftIO $ addExportedLocation db ek loc
when sent $ when sent $
logChange (asKey ek) (uuid r) InfoPresent logChange ek (uuid r) InfoPresent
return True return True
startUnexport :: Remote -> ExportHandle -> TopFilePath -> [Git.Sha] -> CommandStart startUnexport :: Remote -> ExportHandle -> TopFilePath -> [Git.Sha] -> CommandStart
@ -335,7 +335,7 @@ startUnexport r db f shas = do
ai = ActionItemOther (Just (fromRawFilePath f')) ai = ActionItemOther (Just (fromRawFilePath f'))
si = SeekInput [] si = SeekInput []
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart startUnexport' :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startUnexport' r db f ek = startUnexport' r db f ek =
starting ("unexport " ++ name r) ai si $ starting ("unexport " ++ name r) ai si $
performUnexport r db [ek] loc performUnexport r db [ek] loc
@ -350,20 +350,20 @@ startUnexport' r db f ek =
-- remote is untrusted, so would not count as a copy anyway. -- remote is untrusted, so would not count as a copy anyway.
-- Or, an export may be appendonly, and removing a file from it does -- Or, an export may be appendonly, and removing a file from it does
-- not really remove the content, which must be accessible later on. -- not really remove the content, which must be accessible later on.
performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform performUnexport :: Remote -> ExportHandle -> [Key] -> ExportLocation -> CommandPerform
performUnexport r db eks loc = do performUnexport r db eks loc = do
ifM (allM rm eks) ifM (allM rm eks)
( next $ cleanupUnexport r db eks loc ( next $ cleanupUnexport r db eks loc
, stop , stop
) )
where where
rm ek = Remote.action $ removeExport (exportActions r) (asKey ek) loc rm ek = Remote.action $ removeExport (exportActions r) ek loc
cleanupUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandCleanup cleanupUnexport :: Remote -> ExportHandle -> [Key] -> ExportLocation -> CommandCleanup
cleanupUnexport r db eks loc = do cleanupUnexport r db eks loc = do
liftIO $ do liftIO $ do
forM_ eks $ \ek -> forM_ eks $ \ek ->
removeExportedLocation db (asKey ek) loc removeExportedLocation db ek loc
flushDbQueue db flushDbQueue db
-- An versionedExport remote supports removeExportLocation to remove -- An versionedExport remote supports removeExportLocation to remove
@ -371,12 +371,12 @@ cleanupUnexport r db eks loc = do
-- and allows retrieving it. -- and allows retrieving it.
unless (versionedExport (exportActions r)) $ do unless (versionedExport (exportActions r)) $ do
remaininglocs <- liftIO $ remaininglocs <- liftIO $
concat <$> forM eks (\ek -> getExportedLocation db (asKey ek)) concat <$> forM eks (getExportedLocation db)
when (null remaininglocs) $ when (null remaininglocs) $
forM_ eks $ \ek -> forM_ eks $ \ek ->
logChange (asKey ek) (uuid r) InfoMissing logChange ek (uuid r) InfoMissing
removeEmptyDirectories r db loc (map asKey eks) removeEmptyDirectories r db loc eks
startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
startRecoverIncomplete r db sha oldf startRecoverIncomplete r db sha oldf
@ -387,12 +387,12 @@ startRecoverIncomplete r db sha oldf
let ai = ActionItemOther (Just (fromRawFilePath (fromExportLocation loc))) let ai = ActionItemOther (Just (fromRawFilePath (fromExportLocation loc)))
let si = SeekInput [] let si = SeekInput []
starting ("unexport " ++ name r) ai si $ do starting ("unexport " ++ name r) ai si $ do
liftIO $ removeExportedLocation db (asKey ek) oldloc liftIO $ removeExportedLocation db ek oldloc
performUnexport r db [ek] loc performUnexport r db [ek] loc
where where
oldloc = mkExportLocation $ getTopFilePath oldf oldloc = mkExportLocation $ getTopFilePath oldf
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startMoveToTempName r db f ek = startMoveToTempName r db f ek =
starting ("rename " ++ name r) ai si $ starting ("rename " ++ name r) ai si $
performRename r db ek loc tmploc performRename r db ek loc tmploc
@ -403,11 +403,11 @@ startMoveToTempName r db f ek =
ai = ActionItemOther $ Just $ fromRawFilePath f' ++ " -> " ++ fromRawFilePath (fromExportLocation tmploc) ai = ActionItemOther $ Just $ fromRawFilePath f' ++ " -> " ++ fromRawFilePath (fromExportLocation tmploc)
si = SeekInput [] si = SeekInput []
startMoveFromTempName :: Remote -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart startMoveFromTempName :: Remote -> ExportHandle -> Key -> TopFilePath -> CommandStart
startMoveFromTempName r db ek f = do startMoveFromTempName r db ek f = do
let tmploc = exportTempName ek let tmploc = exportTempName ek
let ai = ActionItemOther (Just (fromRawFilePath (fromExportLocation tmploc) ++ " -> " ++ fromRawFilePath f')) let ai = ActionItemOther (Just (fromRawFilePath (fromExportLocation tmploc) ++ " -> " ++ fromRawFilePath f'))
stopUnless (liftIO $ elem tmploc <$> getExportedLocation db (asKey ek)) $ stopUnless (liftIO $ elem tmploc <$> getExportedLocation db ek) $
starting ("rename " ++ name r) ai si $ starting ("rename " ++ name r) ai si $
performRename r db ek tmploc loc performRename r db ek tmploc loc
where where
@ -415,9 +415,9 @@ startMoveFromTempName r db ek f = do
f' = getTopFilePath f f' = getTopFilePath f
si = SeekInput [] si = SeekInput []
performRename :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandPerform performRename :: Remote -> ExportHandle -> Key -> ExportLocation -> ExportLocation -> CommandPerform
performRename r db ek src dest = performRename r db ek src dest =
tryNonAsync (renameExport (exportActions r) (asKey ek) src dest) >>= \case tryNonAsync (renameExport (exportActions r) ek src dest) >>= \case
Right (Just ()) -> next $ cleanupRename r db ek src dest Right (Just ()) -> next $ cleanupRename r db ek src dest
Left err -> do Left err -> do
warning $ "rename failed (" ++ show err ++ "); deleting instead" warning $ "rename failed (" ++ show err ++ "); deleting instead"
@ -427,14 +427,14 @@ performRename r db ek src dest =
where where
fallbackdelete = performUnexport r db [ek] src fallbackdelete = performUnexport r db [ek] src
cleanupRename :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandCleanup cleanupRename :: Remote -> ExportHandle -> Key -> ExportLocation -> ExportLocation -> CommandCleanup
cleanupRename r db ek src dest = do cleanupRename r db ek src dest = do
liftIO $ do liftIO $ do
removeExportedLocation db (asKey ek) src removeExportedLocation db ek src
addExportedLocation db (asKey ek) dest addExportedLocation db ek dest
flushDbQueue db flushDbQueue db
if exportDirectories src /= exportDirectories dest if exportDirectories src /= exportDirectories dest
then removeEmptyDirectories r db src [asKey ek] then removeEmptyDirectories r db src [ek]
else return True else return True
-- | Remove empty directories from the export. Call after removing an -- | Remove empty directories from the export. Call after removing an

View file

@ -200,9 +200,9 @@ removeExportTree h k loc = queueDb h $
-- and updates state. -- and updates state.
type ExportDiffUpdater type ExportDiffUpdater
= ExportHandle = ExportHandle
-> Maybe ExportKey -> Maybe Key
-- ^ old exported key -- ^ old exported key
-> Maybe ExportKey -> Maybe Key
-- ^ new exported key -- ^ new exported key
-> Git.DiffTree.DiffTreeItem -> Git.DiffTree.DiffTreeItem
-> Annex () -> Annex ()
@ -214,10 +214,10 @@ mkExportDiffUpdater
mkExportDiffUpdater removeold addnew h srcek dstek i = do mkExportDiffUpdater removeold addnew h srcek dstek i = do
case srcek of case srcek of
Nothing -> return () Nothing -> return ()
Just k -> liftIO $ removeold h (asKey k) loc Just k -> liftIO $ removeold h k loc
case dstek of case dstek of
Nothing -> return () Nothing -> return ()
Just k -> liftIO $ addnew h (asKey k) loc Just k -> liftIO $ addnew h k loc
where where
loc = mkExportLocation $ getTopFilePath $ Git.DiffTree.file i loc = mkExportLocation $ getTopFilePath $ Git.DiffTree.file i

20
Key.hs
View file

@ -23,8 +23,6 @@ module Key (
nonChunkKey, nonChunkKey,
chunkKeyOffset, chunkKeyOffset,
isChunkKey, isChunkKey,
gitShaKey,
keyGitSha,
isKeyPrefix, isKeyPrefix,
splitKeyNameExtension, splitKeyNameExtension,
@ -37,7 +35,6 @@ import qualified Data.Attoparsec.ByteString as A
import Common import Common
import Types.Key import Types.Key
import Git.Types
import Utility.QuickCheck import Utility.QuickCheck
import Utility.Bloom import Utility.Bloom
import Utility.Aeson import Utility.Aeson
@ -61,23 +58,6 @@ chunkKeyOffset k = (*)
isChunkKey :: Key -> Bool isChunkKey :: Key -> Bool
isChunkKey k = isJust (fromKey keyChunkSize k) && isJust (fromKey keyChunkNum k) isChunkKey k = isJust (fromKey keyChunkSize k) && isJust (fromKey keyChunkNum k)
-- Encodes a git sha as a key.
--
-- This is not the same as a SHA1 key, because the mapping needs to be
-- bijective, also because git may not always use SHA1.
gitShaKey :: Sha -> Key
gitShaKey (Ref s) = mkKey $ \kd -> kd
{ keyName = s
, keyVariety = OtherKey "GIT"
}
-- Reverse of gitShaKey
keyGitSha :: Key -> Maybe Sha
keyGitSha k
| fromKey keyVariety k == OtherKey "GIT" =
Just (Ref (fromKey keyName k))
| otherwise = Nothing
serializeKey :: Key -> String serializeKey :: Key -> String
serializeKey = decodeBS' . serializeKey' serializeKey = decodeBS' . serializeKey'

View file

@ -76,6 +76,10 @@ content of an annexed file remains unchanged.
passing it to a shell script. The URL-backend key is distinct from URLs/URIs passing it to a shell script. The URL-backend key is distinct from URLs/URIs
that may be attached to a key (from any backend) indicating the key's location that may be attached to a key (from any backend) indicating the key's location
on the web or in one of [[special_remotes]]. on the web or in one of [[special_remotes]].
* `GIT` -- This is used internally by git-annex when exporting trees
containing files stored in git, rather than git-annex. It represents a
git sha. This is never used for git-annex links, but information about
keys of this type is stored in the git-annex branch.
## external backends ## external backends