Merge branch 'exportreeplus'

This commit is contained in:
Joey Hess 2024-08-08 15:31:57 -04:00
commit 2616056cde
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
40 changed files with 705 additions and 222 deletions

View file

@ -44,6 +44,7 @@ import Annex.Transfer
import Annex.CheckIgnore
import Annex.CatFile
import Annex.VectorClock
import Annex.SpecialRemote.Config
import Command
import Backend
import Types.Key
@ -194,7 +195,7 @@ recordImportTree remote importtreeconfig imported = do
_ -> noop
-- When the remote is versioned, it still contains keys
-- that are not present in the new tree.
unless (Remote.versionedExport (Remote.exportActions remote)) $ do
unless (isVersioning (Remote.config remote)) $ do
db <- Export.openDb (Remote.uuid remote)
forM_ (exportedTreeishes oldexport) $ \oldtree ->
Export.runExportDiffUpdater updater db oldtree finaltree

View file

@ -27,6 +27,8 @@ module Annex.Locations (
gitAnnexInodeSentinalCache,
annexLocationsBare,
annexLocationsNonBare,
annexLocation,
exportAnnexObjectLocation,
gitAnnexDir,
gitAnnexObjectDir,
gitAnnexTmpOtherDir,
@ -121,6 +123,7 @@ import Types.UUID
import Types.GitConfig
import Types.Difference
import Types.BranchState
import Types.Export
import qualified Git
import qualified Git.Types as Git
import Git.FilePath
@ -169,6 +172,13 @@ annexLocationsBare config key =
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
annexLocation config key hasher = objectDir P.</> keyPath key (hasher $ objectHashLevels config)
{- For exportree remotes with annexobjects=true, objects are stored
- in this location as well as in the exported tree. -}
exportAnnexObjectLocation :: GitConfig -> Key -> ExportLocation
exportAnnexObjectLocation gc k =
mkExportLocation $
".git" P.</> annexLocation gc k hashDirLower
{- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -}
gitAnnexLocationDepth :: GitConfig -> Int

View file

@ -19,12 +19,15 @@ import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
import Annex.Concurrent
import Annex.Tmp
import Annex.Verify
import Annex.UUID
import Logs.Proxy
import Logs.Cluster
import Logs.UUID
import Logs.Location
import Utility.Tmp.Dir
import Utility.Metered
import Git.Types
import qualified Database.Export as Export
import Control.Concurrent.STM
import Control.Concurrent.Async
@ -63,8 +66,12 @@ proxySpecialRemoteSide clientmaxversion r = mkRemoteSide r $ do
owaitv <- liftIO newEmptyTMVarIO
iclosedv <- liftIO newEmptyTMVarIO
oclosedv <- liftIO newEmptyTMVarIO
exportdb <- ifM (Remote.isExportSupported r)
( Just <$> Export.openDb (Remote.uuid r)
, pure Nothing
)
worker <- liftIO . async =<< forkState
(proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv)
(proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv exportdb)
let remoteconn = P2PConnection
{ connRepo = Nothing
, connCheckAuth = const False
@ -75,6 +82,7 @@ proxySpecialRemoteSide clientmaxversion r = mkRemoteSide r $ do
let closeremoteconn = do
liftIO $ atomically $ putTMVar oclosedv ()
join $ liftIO (wait worker)
maybe noop Export.closeDb exportdb
return $ Just
( remoterunst
, remoteconn
@ -89,8 +97,9 @@ proxySpecialRemote
-> TMVar (Either L.ByteString Message)
-> TMVar ()
-> TMVar ()
-> Maybe Export.ExportHandle
-> Annex ()
proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
where
go :: Annex ()
go = liftIO receivemessage >>= \case
@ -167,7 +176,7 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
proxyput af k = do
liftIO $ sendmessage $ PUT_FROM (Offset 0)
withproxytmpfile k $ \tmpfile -> do
let store = tryNonAsync (Remote.storeKey r k af (Just (decodeBS tmpfile)) nullMeterUpdate) >>= \case
let store = tryNonAsync (storeput k af (decodeBS tmpfile)) >>= \case
Right () -> liftIO $ sendmessage SUCCESS
Left err -> liftIO $ propagateerror err
liftIO receivemessage >>= \case
@ -191,6 +200,25 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
_ -> giveup "protocol error"
liftIO $ removeWhenExistsWith removeFile (fromRawFilePath tmpfile)
storeput k af tmpfile = case mexportdb of
Just exportdb -> liftIO (Export.getExportTree exportdb k) >>= \case
[] -> storeputkey k af tmpfile
locs -> do
havelocs <- liftIO $ S.fromList
<$> Export.getExportedLocation exportdb k
let locs' = filter (`S.notMember` havelocs) locs
forM_ locs' $ \loc ->
storeputexport exportdb k loc tmpfile
liftIO $ Export.flushDbQueue exportdb
Nothing -> storeputkey k af tmpfile
storeputkey k af tmpfile =
Remote.storeKey r k af (Just tmpfile) nullMeterUpdate
storeputexport exportdb k loc tmpfile = do
Remote.storeExport (Remote.exportActions r) tmpfile k loc nullMeterUpdate
liftIO $ Export.addExportedLocation exportdb k loc
receivetofile iv h n = liftIO receivebytestring >>= \case
Just b -> do
liftIO $ atomically $
@ -248,9 +276,9 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
{- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -}
checkCanProxy :: UUID -> UUID -> Annex Bool
checkCanProxy remoteuuid ouruuid = do
ourproxies <- M.lookup ouruuid <$> getProxies
checkCanProxy' ourproxies remoteuuid >>= \case
checkCanProxy remoteuuid myuuid = do
myproxies <- M.lookup myuuid <$> getProxies
checkCanProxy' myproxies remoteuuid >>= \case
Right v -> do
Annex.changeState $ \st -> st { Annex.proxyremote = Just v }
return True
@ -266,32 +294,12 @@ checkCanProxy' (Just proxies) remoteuuid =
Just cu -> proxyforcluster cu
Nothing -> proxyfor ps
where
-- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = Remote.uuid r == remoteuuid
let samename r p = Remote.name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
case canProxyForRemote rs ps myclusters remoteuuid of
Nothing -> notconfigured
Just r -> return (Right (Right r))
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch causing unexpected proxying for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do
clusters <- getClusters
@ -304,6 +312,57 @@ checkCanProxy' (Just proxies) remoteuuid =
"not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return $ Left Nothing
{- Remotes that this repository is configured to proxy for.
-
- When there are multiple remotes that access the same repository,
- this picks the lowest cost one that is configured to be used as a proxy.
-}
proxyForRemotes :: Annex [Remote]
proxyForRemotes = do
myuuid <- getUUID
(M.lookup myuuid <$> getProxies) >>= \case
Nothing -> return []
Just myproxies -> do
let myproxies' = S.toList myproxies
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
return $ mapMaybe (canProxyForRemote rs myproxies' myclusters . Remote.uuid) rs
-- Only proxy for a remote when the git configuration allows it.
-- This is important to prevent changes to the git-annex branch
-- causing unexpected proxying for remotes.
canProxyForRemote
:: [Remote] -- ^ must be sorted by cost
-> [Proxy]
-> M.Map RemoteName ClusterUUID
-> UUID
-> (Maybe Remote)
canProxyForRemote rs myproxies myclusters remoteuuid =
headMaybe $ filter canproxy rs
where
canproxy r =
sameuuid r &&
proxyisconfigured r &&
any (isproxyfor r) myproxies
sameuuid r = Remote.uuid r == remoteuuid
isproxyfor r p =
proxyRemoteUUID p == remoteuuid &&
Remote.name r == proxyRemoteName p
proxyisconfigured r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
mkProxyMethods :: ProxyMethods
mkProxyMethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing

View file

@ -93,6 +93,9 @@ exportTreeField = Accepted "exporttree"
importTreeField :: RemoteConfigField
importTreeField = Accepted "importtree"
versioningField :: RemoteConfigField
versioningField = Accepted "versioning"
exportTree :: ParsedRemoteConfig -> Bool
exportTree = fromMaybe False . getRemoteConfigValue exportTreeField
@ -100,6 +103,15 @@ exportTree = fromMaybe False . getRemoteConfigValue exportTreeField
importTree :: ParsedRemoteConfig -> Bool
importTree = fromMaybe False . getRemoteConfigValue importTreeField
isVersioning :: ParsedRemoteConfig -> Bool
isVersioning = fromMaybe False . getRemoteConfigValue versioningField
annexObjectsField :: RemoteConfigField
annexObjectsField = Accepted "annexobjects"
annexObjects :: ParsedRemoteConfig -> Bool
annexObjects = fromMaybe False . getRemoteConfigValue annexObjectsField
{- Parsers for fields that are common to all special remotes. -}
commonFieldParsers :: [RemoteConfigFieldParser]
commonFieldParsers =
@ -124,6 +136,8 @@ essentialFieldParsers =
(FieldDesc "export trees of files to this remote")
, yesNoParser importTreeField (Just False)
(FieldDesc "import trees of files from this remote")
, yesNoParser annexObjectsField (Just False)
(FieldDesc "store other objects in remote along with exported trees")
]
autoEnableFieldParser :: RemoteConfigFieldParser

View file

@ -1,3 +1,20 @@
git-annex (10.20240831) UNRELEASED; urgency=medium
* export: Added --from option.
* Special remotes configured with exporttree=yes annexobjects=yes
can store objects in .git/annex/objects, as well as an exported tree.
* Support proxying to special remotes configured with
exporttree=yes annexobjects=yes.
* post-retrieve: When proxying is enabled for an exporttree=yes
special remote and the configured remote.name.annex-tracking-branch
is received, the tree is exported to the special remote.
* updateproxy, updatecluster: Prevent using an exporttree=yes special
remote that does not have annexobjects=yes, since it will not work.
* git-remote-annex: Store objects in exportree=yes special remotes
in the same paths used by annexobjects=yes.
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400
git-annex (10.20240808) upstream; urgency=medium
* Remove debug output (to stderr) accidentially included in

View file

@ -1010,14 +1010,13 @@ getKeyExportLocations rmt k = do
-- inside the .git/annex/objects/ directory in the remote.
--
-- The first ExportLocation in the returned list is the one that
-- is the same as the local repository would use. But it's possible
-- that one of the others in the list was used by another repository to
-- upload a git key.
-- should be used to store a key. But it's possible
-- that one of the others in the list was used.
keyExportLocations :: Remote -> Key -> GitConfig -> UUID -> Maybe [ExportLocation]
keyExportLocations rmt k cfg uuid
| exportTree (Remote.config rmt) || importTree (Remote.config rmt) =
Just $ map (\p -> mkExportLocation (".git" P.</> p)) $
concatMap (`annexLocationsNonBare` k) cfgs
concatMap (`annexLocationsBare` k) cfgs
| otherwise = Nothing
where
-- When git-annex has not been initialized yet (eg, when cloning),

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2017-2023 Joey Hess <id@joeyh.name>
- Copyright 2017-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -21,6 +21,7 @@ import Git.Types
import Git.FilePath
import Git.Sha
import qualified Remote
import qualified Types.Remote as Remote
import Types.Remote
import Types.Export
import Annex.Export
@ -29,6 +30,7 @@ import Annex.Transfer
import Annex.CatFile
import Annex.FileMatcher
import Annex.RemoteTrackingBranch
import Annex.SpecialRemote.Config
import Logs.Location
import Logs.Export
import Logs.PreferredContent
@ -41,6 +43,7 @@ import Utility.Matcher
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import qualified Data.Set as S
import Control.Concurrent
cmd :: Command
@ -53,6 +56,7 @@ data ExportOptions = ExportOptions
{ exportTreeish :: Git.Ref
-- ^ can be a tree, a branch, a commit, or a tag
, exportRemote :: DeferredParse Remote
, sourceRemote :: [DeferredParse Remote]
, exportTracking :: Bool
}
@ -60,6 +64,7 @@ optParser :: CmdParamsDesc -> Parser ExportOptions
optParser _ = ExportOptions
<$> (Git.Ref <$> parsetreeish)
<*> (mkParseRemoteOption <$> parseToOption)
<*> many (mkParseRemoteOption <$> parseFromOption)
<*> parsetracking
where
parsetreeish = argument str
@ -82,6 +87,9 @@ seek o = startConcurrency commandStages $ do
unlessM (isExportSupported r) $
giveup "That remote does not support exports."
srcrs <- concat . Remote.byCost
<$> mapM getParsed (sourceRemote o)
-- handle deprecated option
when (exportTracking o) $
setConfig (remoteAnnexConfig r "tracking-branch")
@ -92,12 +100,15 @@ seek o = startConcurrency commandStages $ do
inRepo (Git.Ref.tree (exportTreeish o))
mtbcommitsha <- getExportCommit r (exportTreeish o)
seekExport r tree mtbcommitsha srcrs
seekExport :: Remote -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> [Remote] -> CommandSeek
seekExport r tree mtbcommitsha srcrs = do
db <- openDb (uuid r)
writeLockDbWhile db $ do
changeExport r db tree
unlessM (Annex.getRead Annex.fast) $ do
void $ fillExport r db tree mtbcommitsha
void $ fillExport r db tree mtbcommitsha srcrs
closeDb db
-- | When the treeish is a branch like master or refs/heads/master
@ -150,16 +161,15 @@ changeExport r db (ExportFiltered new) = do
[oldtreesha] -> do
diffmap <- mkDiffMap oldtreesha new db
let seekdiffmap a = mapM_ a (M.toList diffmap)
-- Rename old files to temp, or delete.
let deleteoldf = \ek oldf -> commandAction $
startUnexport' r db oldf ek
let disposeoldf = \ek oldf -> commandAction $
startDispose r db oldf ek
seekdiffmap $ \case
(ek, (oldf:oldfs, _newf:_)) -> do
commandAction $
startMoveToTempName r db oldf ek
forM_ oldfs (deleteoldf ek)
forM_ oldfs (disposeoldf ek)
(ek, (oldfs, [])) ->
forM_ oldfs (deleteoldf ek)
forM_ oldfs (disposeoldf ek)
(_ek, ([], _)) -> noop
waitForAllRunningCommandActions
-- Rename from temp to new files.
@ -237,8 +247,8 @@ newtype AllFilled = AllFilled { fromAllFilled :: Bool }
--
-- Once all exported files have reached the remote, updates the
-- remote tracking branch.
fillExport :: Remote -> ExportHandle -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> Annex Bool
fillExport r db (ExportFiltered newtree) mtbcommitsha = do
fillExport :: Remote -> ExportHandle -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> [Remote] -> Annex Bool
fillExport r db (ExportFiltered newtree) mtbcommitsha srcrs = do
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong False)
@ -246,7 +256,7 @@ fillExport r db (ExportFiltered newtree) mtbcommitsha = do
cvar <- liftIO $ newMVar (FileUploaded False)
allfilledvar <- liftIO $ newMVar (AllFilled True)
commandActions $
map (startExport r db cvar allfilledvar) l
map (startExport r srcrs db cvar allfilledvar) l
void $ liftIO $ cleanup
waitForAllRunningCommandActions
@ -259,8 +269,8 @@ fillExport r db (ExportFiltered newtree) mtbcommitsha = do
liftIO $ fromFileUploaded <$> takeMVar cvar
startExport :: Remote -> ExportHandle -> MVar FileUploaded -> MVar AllFilled -> Git.LsTree.TreeItem -> CommandStart
startExport r db cvar allfilledvar ti = do
startExport :: Remote -> [Remote] -> ExportHandle -> MVar FileUploaded -> MVar AllFilled -> Git.LsTree.TreeItem -> CommandStart
startExport r srcrs db cvar allfilledvar ti = do
ek <- exportKey (Git.LsTree.sha ti)
stopUnless (notrecordedpresent ek) $
starting ("export " ++ name r) ai si $
@ -268,7 +278,7 @@ startExport r db cvar allfilledvar ti = do
( next $ cleanupExport r db ek loc False
, do
liftIO $ modifyMVar_ cvar (pure . const (FileUploaded True))
performExport r db ek af (Git.LsTree.sha ti) loc allfilledvar
performExport r srcrs db ek af (Git.LsTree.sha ti) loc allfilledvar
)
where
loc = mkExportLocation f
@ -291,26 +301,10 @@ startExport r db cvar allfilledvar ti = do
else notElem (uuid r) <$> loggedLocations ek
)
performExport :: Remote -> ExportHandle -> Key -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform
performExport r db ek af contentsha loc allfilledvar = do
let storer = storeExport (exportActions r)
performExport :: Remote -> [Remote] -> ExportHandle -> Key -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform
performExport r srcrs db ek af contentsha loc allfilledvar = do
sent <- tryNonAsync $ if not (isGitShaKey ek)
then ifM (inAnnex ek)
( notifyTransfer Upload af $
-- alwaysUpload because the same key
-- could be used for more than one export
-- location, and concurrently uploading
-- of the content should still be allowed.
alwaysUpload (uuid r) ek af Nothing stdRetry $ \pm -> do
let rollback = void $
performUnexport r db [ek] loc
sendAnnex ek Nothing rollback $ \f _sz ->
Remote.action $
storer f ek loc pm
, do
showNote "not available"
return False
)
then tryrenameannexobject $ sendannexobject
-- Sending a non-annexed file.
else withTmpFile "export" $ \tmp h -> do
b <- catObject contentsha
@ -327,6 +321,65 @@ performExport r db ek af contentsha loc allfilledvar = do
Left err -> do
failedsend
throwM err
where
storer = storeExport (exportActions r)
sendannexobject = ifM (inAnnex ek)
( sendlocalannexobject
, do
locs <- S.fromList <$> loggedLocations ek
case filter (\sr -> S.member (Remote.uuid sr) locs) srcrs of
[] -> do
showNote "not available"
return False
(srcr:_) -> getsendannexobject srcr
)
sendlocalannexobject = sendwith $ \p -> do
let rollback = void $
performUnexport r db [ek] loc
sendAnnex ek Nothing rollback $ \f _sz ->
Remote.action $
storer f ek loc p
sendwith a =
notifyTransfer Upload af $
-- alwaysUpload because the same key
-- could be used for more than one export
-- location, and concurrently uploading
-- of the content should still be allowed.
alwaysUpload (uuid r) ek af Nothing stdRetry a
-- Similar to Command.Move.fromToPerform, use a regular download
-- of a local copy, lock early, and drop the local copy after sending.
getsendannexobject srcr = do
showAction $ UnquotedString $ "from " ++ Remote.name srcr
ifM (notifyTransfer Download af $ download srcr ek af stdRetry)
( lockContentForRemoval ek (return False) $ \contentlock -> do
showAction $ UnquotedString $ "to " ++ Remote.name r
sendlocalannexobject
`finally` removeAnnex contentlock
, return False
)
tryrenameannexobject fallback
| annexObjects (Remote.config r) = do
case renameExport (exportActions r) of
Just renameaction -> do
locs <- loggedLocations ek
gc <- Annex.getGitConfig
let objloc = exportAnnexObjectLocation gc ek
if Remote.uuid r `elem` locs
then tryNonAsync (renameaction ek objloc loc) >>= \case
Right (Just ()) -> do
liftIO $ addExportedLocation db ek loc
liftIO $ flushDbQueue db
return True
Left _err -> fallback
Right Nothing -> fallback
else fallback
Nothing -> fallback
| otherwise = fallback
cleanupExport :: Remote -> ExportHandle -> Key -> ExportLocation -> Bool -> CommandCleanup
cleanupExport r db ek loc sent = do
@ -348,16 +401,6 @@ startUnexport r db f shas = do
ai = ActionItemTreeFile f'
si = SeekInput []
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startUnexport' r db f ek =
starting ("unexport " ++ name r) ai si $
performUnexport r db [ek] loc
where
loc = mkExportLocation f'
f' = getTopFilePath f
ai = ActionItemTreeFile f'
si = SeekInput []
-- Unlike a usual drop from a repository, this does not check that
-- numcopies is satisfied before removing the content. Typically an export
-- remote is untrusted, so would not count as a copy anyway.
@ -379,18 +422,43 @@ cleanupUnexport r db eks loc = do
removeExportedLocation db ek loc
flushDbQueue db
-- A versionedExport remote supports removeExportLocation to remove
-- A versioned remote supports removeExportLocation to remove
-- the file from the exported tree, but still retains the content
-- and allows retrieving it.
unless (versionedExport (exportActions r)) $ do
unless (isVersioning (Remote.config r)) $ do
remaininglocs <- liftIO $
concat <$> forM eks (getExportedLocation db)
when (null remaininglocs) $
forM_ eks $ \ek ->
logChange ek (uuid r) InfoMissing
-- When annexobject=true, a key that
-- was unexported may still be present
-- on the remote.
if annexObjects (Remote.config r)
then tryNonAsync (checkPresent r ek) >>= \case
Right False ->
logChange ek (uuid r) InfoMissing
_ -> noop
else logChange ek (uuid r) InfoMissing
removeEmptyDirectories r db loc eks
-- Dispose of an old exported file by either unexporting it, or by moving
-- it to the annexobjects location.
startDispose :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startDispose r db f ek =
starting ("unexport " ++ name r) ai si $
if annexObjects (Remote.config r) && not (isGitShaKey ek)
then do
gc <- Annex.getGitConfig
performRename False r db ek loc
(exportAnnexObjectLocation gc ek)
else performUnexport r db [ek] loc
where
loc = mkExportLocation f'
f' = getTopFilePath f
ai = ActionItemTreeFile f'
si = SeekInput []
startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
startRecoverIncomplete r db sha oldf
| sha `elem` nullShas = stop
@ -408,7 +476,7 @@ startRecoverIncomplete r db sha oldf
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startMoveToTempName r db f ek = case renameExport (exportActions r) of
Just _ -> starting ("rename " ++ name r) ai si $
performRename r db ek loc tmploc
performRename True r db ek loc tmploc
Nothing -> starting ("unexport " ++ name r) ai' si $
performUnexport r db [ek] loc
where
@ -424,7 +492,7 @@ startMoveFromTempName :: Remote -> ExportHandle -> Key -> TopFilePath -> Command
startMoveFromTempName r db ek f = case renameExport (exportActions r) of
Just _ -> stopUnless (liftIO $ elem tmploc <$> getExportedLocation db ek) $
starting ("rename " ++ name r) ai si $
performRename r db ek tmploc loc
performRename True r db ek tmploc loc
Nothing -> starting ("unexport " ++ name r) ai' si $
performUnexport r db [ek] tmploc
where
@ -436,12 +504,14 @@ startMoveFromTempName r db ek f = case renameExport (exportActions r) of
ai' = ActionItemTreeFile (fromExportLocation tmploc)
si = SeekInput []
performRename :: Remote -> ExportHandle -> Key -> ExportLocation -> ExportLocation -> CommandPerform
performRename r db ek src dest = case renameExport (exportActions r) of
performRename :: Bool -> Remote -> ExportHandle -> Key -> ExportLocation -> ExportLocation -> CommandPerform
performRename warnonfail r db ek src dest = case renameExport (exportActions r) of
Just renameaction -> tryNonAsync (renameaction ek src dest) >>= \case
Right (Just ()) -> next $ cleanupRename r db ek src dest
Left err -> do
warning $ UnquotedString $ "rename failed (" ++ show err ++ "); deleting instead"
when warnonfail $
warning $ UnquotedString $
"rename failed (" ++ show err ++ "); deleting instead"
fallbackdelete
Right Nothing -> fallbackdelete
-- remote does not support renaming

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2017 Joey Hess <id@joeyh.name>
- Copyright 2017-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -11,10 +11,23 @@ module Command.PostReceive where
import Command
import qualified Annex
import Git.Types
import Annex.UpdateInstead
import Annex.CurrentBranch
import Command.Sync (mergeLocal, prepMerge, mergeConfig, SyncOptions(..))
import Annex.Proxy
import Remote
import qualified Types.Remote as Remote
import Config
import Git.Types
import Git.Sha
import Git.FilePath
import qualified Git.Ref
import Command.Export (filterExport, getExportCommit, seekExport)
import Command.Sync (syncBranch)
import qualified Data.Set as S
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
-- This does not need to modify the git-annex branch to update the
-- work tree, but auto-initialization might change the git-annex branch.
@ -28,9 +41,62 @@ cmd = noCommit $
(withParams seek)
seek :: CmdParams -> CommandSeek
seek _ = whenM needUpdateInsteadEmulation $ do
seek _ = do
fixPostReceiveHookEnv
commandAction updateInsteadEmulation
whenM needUpdateInsteadEmulation $
commandAction updateInsteadEmulation
proxyExportTree
updateInsteadEmulation :: CommandStart
updateInsteadEmulation = do
prepMerge
let o = def { notOnlyAnnexOption = True }
mc <- mergeConfig False
mergeLocal mc o =<< getCurrentBranch
proxyExportTree :: CommandSeek
proxyExportTree = do
rbs <- catMaybes <$> (mapM canexport =<< proxyForRemotes)
unless (null rbs) $ do
pushedbranches <- liftIO $
S.fromList . map snd . parseHookInput
<$> B.hGetContents stdin
let waspushed (r, (b, d))
| S.member (Git.Ref.branchRef b) pushedbranches =
Just (r, b, d)
| S.member (Git.Ref.branchRef (syncBranch b)) pushedbranches =
Just (r, syncBranch b, d)
| otherwise = Nothing
case headMaybe (mapMaybe waspushed rbs) of
Just (r, b, Nothing) -> go r b
Just (r, b, Just d) -> go r $
Git.Ref.branchFileRef b (getTopFilePath d)
Nothing -> noop
where
canexport r = case remoteAnnexTrackingBranch (Remote.gitconfig r) of
Nothing -> return Nothing
Just branch ->
ifM (isExportSupported r)
( return (Just (r, splitRemoteAnnexTrackingBranchSubdir branch))
, return Nothing
)
go r b = inRepo (Git.Ref.tree b) >>= \case
Nothing -> return ()
Just t -> do
tree <- filterExport r t
mtbcommitsha <- getExportCommit r b
seekExport r tree mtbcommitsha [r]
parseHookInput :: B.ByteString -> [((Sha, Sha), Ref)]
parseHookInput = mapMaybe parse . B8.lines
where
parse l = case B8.words l of
(oldb:newb:refb:[]) -> do
old <- extractSha oldb
new <- extractSha newb
return ((old, new), Ref refb)
_ -> Nothing
{- When run by the post-receive hook, the cwd is the .git directory,
- and GIT_DIR=. It's not clear why git does this.
@ -50,9 +116,3 @@ fixPostReceiveHookEnv = do
}
_ -> noop
updateInsteadEmulation :: CommandStart
updateInsteadEmulation = do
prepMerge
let o = def { notOnlyAnnexOption = True }
mc <- mergeConfig False
mergeLocal mc o =<< getCurrentBranch

View file

@ -28,6 +28,7 @@ module Command.Sync (
parseUnrelatedHistoriesOption,
SyncOptions(..),
OperationMode(..),
syncBranch,
) where
import Command
@ -87,8 +88,6 @@ import Utility.Tuple
import Control.Concurrent.MVar
import qualified Data.Map as M
import qualified Data.ByteString as S
import Data.Char
cmd :: Command
cmd = withAnnexOptions [jobsOption, backendOption] $
@ -307,15 +306,15 @@ seek' o = startConcurrency transferStages $ do
-- repositories, in case that lets content
-- be dropped from other repositories.
exportedcontent <- withbranch $
seekExportContent (Just o)
(filter isExport contentremotes)
seekExportContent (Just o) contentremotes
-- Sync content with remotes, including
-- importing from import remotes (since
-- importing only downloads new files not
-- old files)
let shouldsynccontent r
| isExport r && not (isImport r) = False
| isExport r && not (isImport r)
&& not (exportHasAnnexObjects r) = False
| otherwise = True
syncedcontent <- withbranch $
seekSyncContent o
@ -944,7 +943,8 @@ syncFile o ebloom rs af k = do
wantput r
| pushOption o == False && operationMode o /= SatisfyMode = return False
| Remote.readonly r || remoteAnnexReadOnly (Remote.gitconfig r) = return False
| isExport r || isImport r = return False
| isImport r && not (isExport r) = return False
| isExport r && not (exportHasAnnexObjects r) = return False
| isThirdPartyPopulated r = return False
| otherwise = wantGetBy True (Just k) af (Remote.uuid r)
handleput lack inhere
@ -975,7 +975,13 @@ syncFile o ebloom rs af k = do
- Returns True if any file transfers were made.
-}
seekExportContent :: Maybe SyncOptions -> [Remote] -> CurrBranch -> Annex Bool
seekExportContent o rs (mcurrbranch, madj)
seekExportContent o rs currbranch =
seekExportContent' o (filter canexportcontent rs) currbranch
where
canexportcontent r = isExport r && not (isProxied r)
seekExportContent' :: Maybe SyncOptions -> [Remote] -> CurrBranch -> Annex Bool
seekExportContent' o rs (mcurrbranch, madj)
| null rs = return False
| otherwise = do
-- Propagate commits from the adjusted branch, so that
@ -1013,7 +1019,7 @@ seekExportContent o rs (mcurrbranch, madj)
| tree == currtree -> do
filteredtree <- Command.Export.filterExport r tree
Command.Export.changeExport r db filteredtree
Command.Export.fillExport r db filteredtree mtbcommitsha
Command.Export.fillExport r db filteredtree mtbcommitsha []
| otherwise -> cannotupdateexport r db Nothing False
(Nothing, _, _) -> cannotupdateexport r db (Just (Git.fromRef b ++ " does not exist")) True
(_, Nothing, _) -> cannotupdateexport r db (Just "no branch is currently checked out") True
@ -1056,7 +1062,7 @@ seekExportContent o rs (mcurrbranch, madj)
-- filling in any files that did not get transferred
-- to the existing exported tree.
let filteredtree = Command.Export.ExportFiltered tree
Command.Export.fillExport r db filteredtree mtbcommitsha
Command.Export.fillExport r db filteredtree mtbcommitsha []
fillexistingexport r _ _ _ = do
warnExportImportConflict r
return False
@ -1147,14 +1153,11 @@ isExport = exportTree . Remote.config
isImport :: Remote -> Bool
isImport = importTree . Remote.config
isProxied :: Remote -> Bool
isProxied = isJust . remoteAnnexProxiedBy . Remote.gitconfig
exportHasAnnexObjects :: Remote -> Bool
exportHasAnnexObjects = annexObjects . Remote.config
isThirdPartyPopulated :: Remote -> Bool
isThirdPartyPopulated = Remote.thirdPartyPopulated . Remote.remotetype
splitRemoteAnnexTrackingBranchSubdir :: Git.Ref -> (Git.Ref, Maybe TopFilePath)
splitRemoteAnnexTrackingBranchSubdir tb = (branch, subdir)
where
(b, p) = separate' (== (fromIntegral (ord ':'))) (Git.fromRef' tb)
branch = Git.Ref b
subdir = if S.null p
then Nothing
else Just (asTopFilePath p)

View file

@ -34,10 +34,17 @@ seek = withNothing $ do
start :: CommandStart
start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList
let getnode r = do
clusternames <- remoteAnnexClusterNode (R.gitconfig r)
return $ M.fromList $ zip clusternames (repeat (S.singleton r))
let myclusternodes = M.unionsWith S.union (mapMaybe getnode rs)
let getnode r = case remoteAnnexClusterNode (R.gitconfig r) of
Nothing -> return Nothing
Just [] -> return Nothing
Just clusternames ->
ifM (Command.UpdateProxy.checkCanProxy r "Cannot use this special remote as a cluster node.")
( return $ Just $ M.fromList $
zip clusternames (repeat (S.singleton r))
, return Nothing
)
myclusternodes <- M.unionsWith S.union . catMaybes
<$> mapM getnode rs
myclusters <- annexClusters <$> Annex.getGitConfig
recordedclusters <- getClusters
descs <- R.uuidDescriptions

View file

@ -14,6 +14,7 @@ import Logs.Cluster
import Annex.UUID
import qualified Remote as R
import qualified Types.Remote as R
import Annex.SpecialRemote.Config
import Utility.SafeOutput
import qualified Data.Map as M
@ -30,8 +31,8 @@ seek = withNothing (commandAction start)
start :: CommandStart
start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList
let remoteproxies = S.fromList $ map mkproxy $
filter (isproxy . R.gitconfig) rs
remoteproxies <- S.fromList . map mkproxy
<$> filterM isproxy rs
clusterproxies <- getClusterProxies remoteproxies
let proxies = S.union remoteproxies clusterproxies
u <- getUUID
@ -54,9 +55,33 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do
"Stopped proxying for " ++ proxyRemoteName p
_ -> noop
isproxy c = remoteAnnexProxy c || not (null (remoteAnnexClusterNode c))
mkproxy r = Proxy (R.uuid r) (R.name r)
isproxy r
| remoteAnnexProxy (R.gitconfig r) || not (null (remoteAnnexClusterNode (R.gitconfig r))) =
checkCanProxy r "Cannot proxy to this special remote."
| otherwise = pure False
checkCanProxy :: Remote -> String -> Annex Bool
checkCanProxy r cannotmessage =
ifM (R.isExportSupported r)
( if annexObjects (R.config r)
then pure True
else do
warnannexobjects
pure False
, pure True
)
where
warnannexobjects = warning $ UnquotedString $ unwords
[ R.name r
, "is configured with exporttree=yes, but without"
, "annexobjects=yes."
, cannotmessage
, "Suggest you run: git-annex enableremote"
, R.name r
, "annexobjects=yes"
]
-- Automatically proxy nodes of any cluster this repository is configured
-- to serve as a gateway for. Also proxy other cluster nodes that are

View file

@ -26,8 +26,12 @@ import Types.Availability
import Types.GitConfig
import Types.RemoteConfig
import Git.Types
import Git.FilePath
import Annex.SpecialRemote.Config
import Data.Char
import qualified Data.ByteString as S
{- Looks up a setting in git config. This is not as efficient as using the
- GitConfig type. -}
getConfig :: ConfigKey -> ConfigValue -> Annex ConfigValue
@ -99,3 +103,12 @@ pidLockFile = ifM (annexPidLock <$> Annex.getGitConfig)
#else
pidLockFile = pure Nothing
#endif
splitRemoteAnnexTrackingBranchSubdir :: Git.Ref -> (Git.Ref, Maybe TopFilePath)
splitRemoteAnnexTrackingBranchSubdir tb = (branch, subdir)
where
(b, p) = separate' (== (fromIntegral (ord ':'))) (Git.fromRef' tb)
branch = Git.Ref b
subdir = if S.null p
then Nothing
else Just (asTopFilePath p)

View file

@ -91,7 +91,6 @@ gen r u rc gc rs = do
{ storeExport = storeExportM serial adir
, retrieveExport = retrieveExportM serial adir
, removeExport = removeExportM serial adir
, versionedExport = False
, checkPresentExport = checkPresentExportM serial adir
, removeExportDirectory = Just $ removeExportDirectoryM serial adir
, renameExport = Just $ renameExportM serial adir

View file

@ -107,7 +107,6 @@ gen r u rc gc rs = do
{ storeExport = storeExportM dir cow
, retrieveExport = retrieveExportM dir cow
, removeExport = removeExportM dir
, versionedExport = False
, checkPresentExport = checkPresentExportM dir
-- Not needed because removeExportLocation
-- auto-removes empty directories.

View file

@ -95,7 +95,6 @@ gen rt externalprogram r u rc gc rs
{ storeExport = storeExportM external
, retrieveExport = retrieveExportM external
, removeExport = removeExportM external
, versionedExport = False
, checkPresentExport = checkPresentExportM external
, removeExportDirectory = Just $ removeExportDirectoryM external
, renameExport = Just $ renameExportM external

View file

@ -84,6 +84,7 @@ remote = RemoteType
, configParser = mkRemoteConfigParser
[ optionalStringParser locationField
(FieldDesc "url of git remote to remember with special remote")
, yesNoParser versioningField (Just False) HiddenField
]
, setup = gitSetup
, exportSupported = exportUnsupported
@ -229,7 +230,9 @@ gen r u rc gc rs
, gitconfig = gc
, readonly = Git.repoIsHttp r && not (isP2PHttp' gc)
, appendonly = False
, untrustworthy = False
, untrustworthy = isJust (remoteAnnexProxiedBy gc)
&& (exportTree c || importTree c)
&& not (isVersioning c)
, availability = repoAvail r
, remotetype = remote
, mkUnavailable = unavailable r u rc gc rs

View file

@ -1,25 +1,28 @@
{- Helper to make remotes support export and import (or not).
-
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
- Copyright 2017-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE FlexibleInstances, FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Remote.Helper.ExportImport where
import Annex.Common
import qualified Annex
import Types.Remote
import Types.Key
import Types.ProposedAccepted
import Annex.Verify
import Remote.Helper.Encryptable (encryptionIsEnabled)
import qualified Database.Export as Export
import qualified Database.ContentIdentifier as ContentIdentifier
import Annex.Export
import Annex.LockFile
import Annex.SpecialRemote.Config
import Annex.Verify
import Annex.Content
import Git.Types (fromRef)
import Logs.Export
import Logs.ContentIdentifier (recordContentIdentifier)
@ -39,7 +42,6 @@ instance HasExportUnsupported (ExportActions Annex) where
, retrieveExport = nope
, checkPresentExport = \_ _ -> return False
, removeExport = nope
, versionedExport = False
, removeExportDirectory = nope
, renameExport = Nothing
}
@ -123,15 +125,17 @@ adjustExportImport r rs = do
else importUnsupported
}
}
let annexobjects = isexport && annexObjects (config r)
if not isexport && not isimport
then return r'
else adjustExportImport' isexport isimport r' rs
else do
gc <- Annex.getGitConfig
adjustExportImport' isexport isimport annexobjects r' rs gc
adjustExportImport' :: Bool -> Bool -> Remote -> RemoteStateHandle -> Annex Remote
adjustExportImport' isexport isimport r rs = do
adjustExportImport' :: Bool -> Bool -> Bool -> Remote -> RemoteStateHandle -> GitConfig -> Annex Remote
adjustExportImport' isexport isimport annexobjects r rs gc = do
dbv <- prepdbv
ciddbv <- prepciddb
let versioned = versionedExport (exportActions r)
return $ r
{ exportActions = if isexport
then if isimport
@ -141,52 +145,55 @@ adjustExportImport' isexport isimport r rs = do
, importActions = if isimport
then importActions r
else importUnsupported
, storeKey = \k af p ->
-- Storing a key on an export could be implemented,
-- but it would perform unnecessary work
, storeKey = \k af o p ->
-- Storing a key to an export location could be
-- implemented, but it would perform unnecessary work
-- when another repository has already stored the
-- key, and the local repository does not know
-- about it. To avoid unnecessary costs, don't do it.
if thirdpartypopulated
then giveup "remote is not populated by git-annex"
else if isexport
then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
then if annexobjects
then storeannexobject k o p
else giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
else if isimport
then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it"
else storeKey r k af p
, removeKey = \k ->
-- Removing a key from an export would need to
-- change the tree in the export log to not include
else storeKey r k af o p
, removeKey = \proof k ->
-- Removing a key from an export location would need
-- to change the tree in the export log to not include
-- the file. Otherwise, conflicts when removing
-- files would not be dealt with correctly.
-- There does not seem to be a good use case for
-- removing a key from an export in any case.
-- removing a key from an exported tree in any case.
if thirdpartypopulated
then giveup "dropping content from this remote is not supported"
else if isexport
then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
then if annexobjects
then removeannexobject dbv k
else giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
else if isimport
then giveup "dropping content from this remote is not supported because it is configured with importtree=yes"
else removeKey r k
else removeKey r proof k
, lockContent = if versioned
then lockContent r
else Nothing
, retrieveKeyFile = \k af dest p vc ->
if isimport
if isimport || isexport
then supportversionedretrieve k af dest p vc $
retrieveKeyFileFromImport dbv ciddbv k af dest p
else if isexport
then supportversionedretrieve k af dest p vc $
retrieveKeyFileFromExport dbv k af dest p
else retrieveKeyFile r k af dest p vc
supportretrieveannexobject dbv k af dest p $
retrieveFromImportOrExport (tryexportlocs dbv k) ciddbv k af dest p
else retrieveKeyFile r k af dest p vc
, retrieveKeyFileCheap = if versioned
then retrieveKeyFileCheap r
else Nothing
, checkPresent = \k -> if versioned
then checkPresent r k
else if isimport
then anyM (checkPresentImport ciddbv k)
=<< getanyexportlocs dbv k
then checkpresentwith k $
anyM (checkPresentImport ciddbv k)
=<< getanyexportlocs dbv k
else if isexport
-- Check if any of the files a key
-- was exported to are present. This
@ -197,8 +204,9 @@ adjustExportImport' isexport isimport r rs = do
-- to it. Remotes that have such
-- problems are made untrusted,
-- so it's not worried about here.
then anyM (checkPresentExport (exportActions r) k)
=<< getanyexportlocs dbv k
then checkpresentwith k $
anyM (checkPresentExport (exportActions r) k)
=<< getanyexportlocs dbv k
else checkPresent r k
-- checkPresent from an export is more expensive
-- than otherwise, so not cheap. Also, this
@ -226,13 +234,21 @@ adjustExportImport' isexport isimport r rs = do
then do
ts <- map fromRef . exportedTreeishes
<$> getExport (uuid r)
return (is++[("exporttree", "yes"), ("exportedtree", unwords ts)])
return $ is ++ catMaybes
[ Just ("exporttree", "yes")
, Just ("exportedtree", unwords ts)
, if annexobjects
then Just ("annexobjects", "yes")
else Nothing
]
else return is
return $ if isimport && not thirdpartypopulated
then (is'++[("importtree", "yes")])
else is'
}
where
versioned = isVersioning (config r)
thirdpartypopulated = thirdPartyPopulated (remotetype r)
-- exportActions adjusted to use the equivalent import actions,
@ -313,7 +329,7 @@ adjustExportImport' isexport isimport r rs = do
, liftIO $ atomically (readTMVar dbv)
)
getexportinconflict (_, _, v) = v
isexportinconflict (_, _, v) = liftIO $ atomically $ readTVar v
updateexportdb db exportinconflict =
Export.updateExportTreeFromLog db >>= \case
@ -329,8 +345,8 @@ adjustExportImport' isexport isimport r rs = do
getexportlocs dbv k = do
db <- getexportdb dbv
liftIO $ Export.getExportTree db k >>= \case
[] -> ifM (atomically $ readTVar $ getexportinconflict dbv)
liftIO (Export.getExportTree db k) >>= \case
[] -> ifM (isexportinconflict dbv)
( giveup "unknown export location, likely due to the export conflict"
, return []
)
@ -349,12 +365,16 @@ adjustExportImport' isexport isimport r rs = do
db <- getciddb ciddbv
liftIO $ ContentIdentifier.getContentIdentifiers db rs k
retrieveFromImportOrExport getlocs ciddbv k af dest p
| isimport = retrieveFromImport getlocs ciddbv k af dest p
| otherwise = retrieveFromExport getlocs k af dest p
-- Keys can be retrieved using retrieveExport, but since that
-- retrieves from a path in the remote that another writer could
-- have replaced with content not of the requested key, the content
-- has to be strongly verified.
retrieveKeyFileFromExport dbv k _af dest p = ifM (isVerifiable k)
( tryexportlocs dbv k $ \loc ->
retrieveFromExport getlocs k _af dest p = ifM (isVerifiable k)
( getlocs $ \loc ->
retrieveExport (exportActions r) k loc dest p >>= return . \case
UnVerified -> MustVerify
IncompleteVerify iv -> MustFinishIncompleteVerify iv
@ -362,28 +382,87 @@ adjustExportImport' isexport isimport r rs = do
, giveup $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend"
)
retrieveKeyFileFromImport dbv ciddbv k af dest p = do
retrieveFromImport getlocs ciddbv k af dest p = do
cids <- getkeycids ciddbv k
if not (null cids)
then tryexportlocs dbv k $ \loc ->
then getlocs $ \loc ->
snd <$> retrieveExportWithContentIdentifier (importActions r) loc cids dest (Left k) p
-- In case a content identifier is somehow missing,
-- try this instead.
else if isexport
then retrieveKeyFileFromExport dbv k af dest p
then retrieveFromExport getlocs k af dest p
else giveup "no content identifier is recorded, unable to retrieve"
-- versionedExport remotes have a key/value store, so can use
-- the usual retrieveKeyFile, rather than an import/export
-- variant. However, fall back to that if retrieveKeyFile fails.
supportversionedretrieve k af dest p vc a
| versionedExport (exportActions r) =
retrieveKeyFile r k af dest p vc
`catchNonAsync` const a
| otherwise = a
checkpresentwith k a = ifM a
( return True
, if annexobjects
then checkpresentannexobject k
else return False
)
checkPresentImport ciddbv k loc =
checkPresentExportWithContentIdentifier
(importActions r)
k loc
=<< getkeycids ciddbv k
annexobjectlocation k = exportAnnexObjectLocation gc k
checkpresentannexobject k =
checkPresentExport (exportActions r) k (annexobjectlocation k)
storeannexobject k o p = prepSendAnnex' k o >>= \case
Nothing -> giveup "content is not available"
Just (src, _, checkmodified) -> do
let loc = annexobjectlocation k
storeExport (exportActions r) src k loc p
checkmodified >>= \case
Nothing -> return ()
Just err -> do
removeExport (exportActions r) k loc
giveup err
removeannexobject dbv k =
getanyexportlocs dbv k >>= \case
[] -> ifM (isexportinconflict dbv)
( do
warnExportImportConflict r
giveup "Cannot remove content from the remote until the conflict has been resolved."
, removeExport (exportActions r) k (annexobjectlocation k)
)
_ -> giveup "This key is part of the exported tree, so can only be removed by exporting a tree that does not include it."
retrieveannexobject k af dest p =
retrieveFromExport getlocs k af dest p
where
getlocs a = a (annexobjectlocation k)
supportretrieveannexobject dbv k af dest p a
| annexobjects = tryNonAsync a >>= \case
Right res -> return res
Left err -> tryNonAsync (retrieveannexobject k af dest p) >>= \case
Right res -> return res
-- Both failed, so which exception to
-- throw? If there are known export
-- locations, throw the exception from
-- retrieving from the export locations.
-- If there are no known export locations,
-- throw the exception from retrieving from
-- the annexobjects location.
Left err' -> getanyexportlocs dbv k >>= \case
[] -> ifM (isexportinconflict dbv)
( throwM err
, throwM err'
)
_ -> throwM err
| otherwise = a
-- versioned remotes have a key/value store which
-- the usual retrieveKeyFile can be used with, rather than
-- an import/export variant. However, fall back to that
-- if retrieveKeyFile fails.
supportversionedretrieve k af dest p vc a
| versioned =
retrieveKeyFile r k af dest p vc
`catchNonAsync` const a
| otherwise = a

View file

@ -79,7 +79,6 @@ gen r u rc gc rs = do
{ storeExport = cannotModify
, retrieveExport = retriveExportHttpAlso url
, removeExport = cannotModify
, versionedExport = False
, checkPresentExport = checkPresentExportHttpAlso url
, removeExportDirectory = Nothing
, renameExport = cannotModify

View file

@ -103,7 +103,6 @@ gen r u rc gc rs = do
{ storeExport = storeExportM o
, retrieveExport = retrieveExportM o
, removeExport = removeExportM o
, versionedExport = False
, checkPresentExport = checkPresentExportM o
, removeExportDirectory = Just (removeExportDirectoryM o)
, renameExport = Just $ renameExportM o

View file

@ -143,10 +143,7 @@ storageclassField = Accepted "storageclass"
fileprefixField :: RemoteConfigField
fileprefixField = Accepted "fileprefix"
versioningField :: RemoteConfigField
versioningField = Accepted "versioning"
publicField :: RemoteConfigField
publicField = Accepted "public"
@ -224,7 +221,6 @@ gen r u rc gc rs = do
{ storeExport = storeExportS3 hdl this rs info magic
, retrieveExport = retrieveExportS3 hdl this info
, removeExport = removeExportS3 hdl this rs info
, versionedExport = versioning info
, checkPresentExport = checkPresentExportS3 hdl this info
-- S3 does not have directories.
, removeExportDirectory = Nothing

View file

@ -101,7 +101,6 @@ gen r u rc gc rs = do
, retrieveExport = retrieveExportDav hdl
, checkPresentExport = checkPresentExportDav hdl this
, removeExport = removeExportDav hdl
, versionedExport = False
, removeExportDirectory = Just $
removeExportDirectoryDav hdl
, renameExport = Just $ renameExportDav hdl

View file

@ -34,7 +34,7 @@ import Control.DeepSeq
-- PINNED in memory which caused memory fragmentation and excessive memory
-- use.
newtype ExportLocation = ExportLocation S.ShortByteString
deriving (Show, Eq, Generic)
deriving (Show, Eq, Generic, Ord)
instance NFData ExportLocation

View file

@ -278,11 +278,6 @@ data ExportActions a = ExportActions
-- Can throw exception if unable to access remote, or if remote
-- refuses to remove the content.
, removeExport :: Key -> ExportLocation -> a ()
-- Set when the remote is versioned, so once a Key is stored
-- to an ExportLocation, a subsequent deletion of that
-- ExportLocation leaves the key still accessible to retrieveKeyFile
-- and checkPresent.
, versionedExport :: Bool
-- Removes an exported directory. Typically the directory will be
-- empty, but it could possibly contain files or other directories,
-- and it's ok to delete those (but not required to).

View file

@ -189,8 +189,9 @@ the special remote can reply with `UNSUPPORTED-REQUEST`.
a list of settings with descriptions. Note that the user is not required
to provided all the settings listed here. A block of responses
can be made to this, which must always end with `CONFIGEND`.
(Do not include settings like "encryption" that are common to all external
special remotes.)
(Do not include config like "encryption" that are common to all external
special remotes. Also avoid including a config named "versioning"
unless using it as desribed in the [[export_and_import_appendix]].)
* `CONFIG Name Description`
Indicates the name and description of a config setting. The description
should be reasonably short. Example:

View file

@ -153,13 +153,6 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
Indicates that `IMPORTKEY` can be used.
* `IMPORTKEYSUPPORTED-FAILURE`
Indicates that `IMPORTKEY` cannot be used.
* `VERSIONED`
Used to check if the special remote is versioned.
Note that this request may be made before or after `PREPARE`.
* `ISVERSIONED`
Indicates that the remote is versioned.
* `NOTVERSIONED`
Indicates that the remote is not versioned.
* `LISTIMPORTABLECONTENTS`
Used to get a list of all the files that are stored in the special
remote. A block of responses
@ -178,10 +171,9 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
block of responses. This can be repeated any number of times
(indicating a branching history), and histories can also
be nested multiple levels deep.
This should only be used when the remote supports using
"TRANSFER RECEIVE Key" to retrieve historical versions of files.
And, it should only be used when the remote replies `ISVERSIONED`
to the `VERSIONED` message.
This should only be a response when the remote supports using
"TRANSFER RECEIVE Key" to retrieve historical versions of files,
and when "GETCONFIG versioning" yields "VALUE TRUE".
* `END`
Indicates the end of a block of responses.
* `LOCATION Name`

View file

@ -545,6 +545,10 @@ it pick which of multiple branches to export?
Perhaps configure the annex-tracking-branch in the git-annex branch?
That might be generally useful when working with exporttree=yes remotes.
Or simply configure remote.foo.annex-tracking-branch on the proxy.
This may not meet all use cases, but it's simple and seems like a
reasonable first step.
The first two approaches also have a complication when a key is sent to
the proxy that is not part of the configured annex-tracking-branch. What
does the proxy do with it? There seem three possibilities:
@ -610,19 +614,35 @@ were not accessible when it is accessed directly rather than via the proxy.
Simplified design for proxying to exporttree=yes, if those remotes can
store any key:
* Configure annex-tracking-branch for the proxy in the git-annex branch.
(For the proxy as a whole, or for specific exporttree=yes repos behind
it?)
* Configure annex-tracking-branch in the proxy's git config.
* Then the user's workflow is simply: `git-annex push`
* The proxy handles PUT/GET/REMOVE of a key that is not in the
annex-tracking branch that it currently knows about, by using
the special remote's .git/annex/objects/ location.
* Upon receiving a new annex-tracking-branch or any transfer of a key
used in the current annex-tracking-branch, the proxy can update
the exporttree=yes remote. This needs to happen incrementally,
eg upon receiving a key, just proxy it on to the exporttree=yes remote,
and update the export database. Once all keys are received, update
the git-annex branch to indicate a new tree has been exported.
* The proxy handles PUT by always storing to the special remote's
.git/annex/objects/ location, not updating the exported tree.
* The proxy allows REMOVE from the special remote's
.git/annex/objects/ location, but not removal of keys
that are in the currently exported tree.
* When `git-annex post-receive` is run by the post-receive hook
and the annex-tracking-branch has been updated, it exports
the tree to the special remote.
(But, `git-annex push` sends the updated tree first, so
this will often be an incomplete export.)
* When there is an incomplete export and a key is received
that is part of that export, check if it is the *last* key
that is needed to complete the export. If so, export the tree to the
special remote again.
(This avoids overhead and complication of incrementally updating
the export. It relies on the special remote supporting renameExport.
Incrementally updating the export might be worth doing eventually,
for special remotes that do no support renameExport.)
* When exporting a tree to the special remote, handle cases
where a single key is used by multiple files, and the key is not
present locally. In this case it currently fails to update
one of the files (and renames the annexobjects location to the other
one). It will need to download the content from the special remote and
send it back to it.
* When the special remote does not support renameExport, will need to
download from the annexobjects location in order to store to the export
location.
## possible enhancement: indirect uploads

View file

@ -14,8 +14,7 @@ Normally files are stored on a git-annex special remote named by their
keys. That is great for reliable data storage, but your filenames are
obscured. Exporting replicates the tree to the special remote as-is.
Mixing key/value storage and exports in the same remote would be a mess and
so is not allowed. You have to configure a special remote with
To use this, you have to configure a special remote with
`exporttree=yes` when initially setting it up with
[[git-annex-initremote]](1).
@ -78,6 +77,20 @@ so the overwritten modification is not lost.)
Specify the special remote to export to.
* `--from=remote`
When the content of a file is not available in the local repository,
this option lets it be downloaded from another remote, and sent on to the
destination remote. The file will be temporarily stored on local disk,
but will never enter the local repository.
This option can be repeated multiple times.
It is possible to use --from with the same remote as --to. If the tree
contains several files with the same content, and the remote being
exported to already contains one copy of the content, this allows making
a copy by downloading the content from it.
* `--tracking`
This is a deprecated way to set "remote.<name>.annex-tracking-branch".

View file

@ -17,6 +17,11 @@ for repositories that have an adjusted branch checked
out. The hook updates the work tree when run in such a repository,
the same as running `git-annex merge` would.
When a repository is configured to proxy to a special remote with
exporttree=yes, and the configured remote.name.annex-tracking-branch
is received, the hook handles updating the tree exported to the
special remote.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
@ -29,6 +34,8 @@ the same as running `git-annex merge` would.
[[git-annex-merge]](1)
[[git-annex-export]](1)
# AUTHOR
Joey Hess <id@joeyh.name>

View file

@ -92,8 +92,8 @@ See [[git-annex-preferred-content]](1).
This option can be repeated multiple times with different paths.
Note that this option is ignored when syncing with "exporttree=yes"
remotes.
Note that this option does not prevent exporting other files to an
"exporttree=yes" remote.
* `--all` `-A`

View file

@ -37,8 +37,8 @@ do so by using eg `approxlackingcopies=1`.
This option can be repeated multiple times with different paths.
Note that this option is ignored when syncing with "exporttree=yes"
remotes.
Note that this option does not prevent exporting other files to an
"exporttree=yes" remote.
* `--jobs=N` `-JN`

View file

@ -28,6 +28,14 @@ a proxy.
Proxies can only be accessed via ssh or by an annex+http url.
To set up proxying to a special remote that is configured with
exporttree=yes, its necessary for it to also be configured with
annexobjects=yes. And, "remote.<name>.annex-tracking-branch" needs to
be configured to the branch that will be exported to the special remote.
When that branch is pushed to the proxy, it will update the tree exported
to the special remote. When files are copied to the remote via the proxy,
it will also update the exported tree.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
@ -36,6 +44,7 @@ Proxies can only be accessed via ssh or by an annex+http url.
* [[git-annex]](1)
* [[git-annex-updatecluster]](1)
* [[git-annex-export]](1)
# AUTHOR

View file

@ -351,7 +351,6 @@ content from the key-value store.
See [[git-annex-extendcluster](1) for details.
* `updateproxy`
Update records with proxy configuration.

View file

@ -125,6 +125,11 @@ the S3 remote.
When versioning is not enabled, this risks data loss, and so git-annex
will not let you enable a remote with that configuration unless forced.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `publicurl` - Configure the URL that is used to download files
from the bucket. Using this with a S3 bucket that has been configured
to allow anyone to download its content allows git-annex to download

View file

@ -32,6 +32,11 @@ the adb remote.
by [[git-annex-import]]. When set in combination with exporttree,
this lets files be imported from it, and changes exported back to it.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `oldandroid` - Set to "yes" if your Android device is too old
to support `find -printf`. Enabling this will make importing slower.
If you see an error like "bad arg '-printf'", you can enable this

View file

@ -41,6 +41,11 @@ remote:
by [[git-annex-import]]. It will not be usable as a general-purpose
special remote.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
directory.
* `ignoreinodes` - Usually when importing, the inode numbers
of files are used to detect when files have changed. Since some
filesystems generate new inode numbers each time they are mounted,

View file

@ -32,6 +32,9 @@ for a list of known working combinations.
Setting this does not allow trees to be exported to the httpalso remote,
because it's read-only. But it does let exported files be downloaded
from it.
* `annexobjects` - If the other special remote has `annexobjects=yes`
set (along with `exporttree=yes`), it also needs to be set when
initializing the httpalso remote.
Configuration of encryption and chunking is inherited from the other
special remote, and does not need to be specified when initializing the

View file

@ -26,6 +26,11 @@ These parameters can be passed to `git annex initremote` to configure rsync:
by [[git-annex-export]]. It will not be usable as a general-purpose
special remote.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `shellescape` - Optional. This has no effect when using rsync 3.2.4 or
newer. Set to "no" to avoid shell escaping
normally done when using older versions of rsync over ssh. That escaping

View file

@ -33,6 +33,11 @@ the webdav remote.
by [[git-annex-export]]. It will not be usable as a general-purpose
special remote.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `chunk` - Enables [[chunking]] when storing large files.
* `chunksize` - Deprecated version of chunk parameter above.

View file

@ -16,9 +16,6 @@ keys, in order to support exporttree=yes remotes.
Another place this would be useful is
[[proxying to exporttree=yes special remotes|design/passthrough_proxy]].
This could also solve [[todo/export_paired_rename_innefficenctcy]]
cleanly.
With this change, a user could just `git-annex copy --to remote`
and copy whatever files they want into it. Then later
`git-annex export master --to remote` would efficiently update the tree
@ -52,6 +49,13 @@ surprising for an existing user!
Perhaps this should not be "exportree=yes", but something else.
> Currently, if a remote is configured with "exporttree=foo", that
> is treated the same as "exporttree=no". So this will need to be
> a config added to exporttree=yes in order to interoperate
> with old git-annex.
>
> Call it "exporttree=yes annexobjects=yes" --[[Joey]]
----
Consider two repositories A and B that both have access to the same
@ -60,16 +64,58 @@ exporttree=yes special remote R.
* A exports tree T1 to R
* B pulls from A, so knows R has tree T1
* A exports tree T2 to R, which deletes file `foo`. So
it is moved to R's .git/annex/objects/
it is moved to R's .git/annex/objects. Or, alternatively,
`foo` is deleted, and the key is then copied to R again,
also to .git/annex/objects.
* B exports tree T2 to R also. So B deletes file `foo`. But it was not
present anyway. If B then marks the key as not present in R, we will have
lost track of the fact that A moved it to the objects location.
So, when calling removeExport, have to also check if the key is present in
the objects location. If so, don't record the key as missing. (Or course,
it already checks if some other exported file also has the content of the
key.)
the objects location. If so, either don't record the key as missing, or
also remove from the objects location.
----
Could a remote with annexobjects=yet and exporttree=yes but without
importtree=yes not be forced to be untrusted?
If not, the retrieval from the annexobjects location needs to do strong
verification of the content.
If the annexobjects directory only gets keys uploaded to it, and never had
exported files renamed into it, its content will always be as expected, and
perhaps the remote does not need to be untrusted.
OTOH, if an exported file that is being deleted in an
updated export gets renamed into the annexobjects directory, it's possible
that the file has in fact been overwritten with other content (by git-annex
in another clone of the repository), and so the object in annexobjects
would not be as expected. So unfortunately, it seems that rename can't be
done without forcing untrusted.
Note that, exporting a new tree can still delete any file at any time.
If the remote is not untrusted, that could violate numcopies.
So, performUnexport would need to check numcopies first, when using such a
remote.
Even if they are not untrusted, an exported file can't be counted as a
copy. Only a file in the annexobjects location can be. So the remote's
checkPresent will perhaps need to return false for files that are exported?
But surely other things than numcopies use checkPresent. So this might need
a change to checkPresent's type to indicate the difference.
Crazy idea: Split the remote into two uuids. Use one for
the annexobjects directory, and the other for the exported files. This
clean separation avoids the above problem. But would be confusing for the
user. HOWEVER, what if the two were treated as parts of the same cluster....?
This may be worth revisiting later, but for now, I am leaning to keeping it
untrusted, and following down that line to make it as performant as
possible.
---
Implementing in the "exportreeplus" branch --[[Joey]]
> [[done]] --[[Joey]]

View file

@ -31,7 +31,30 @@ Planned schedule of work:
## work notes
* Working on `exportreeplus` branch which is groundwork for proxying to
exporttree=yes special remotes.
exporttree=yes special remotes. Need to merge it to master.
## completed items for August
* Special remotes configured with exporttree=yes annexobjects=yes
can store objects in .git/annex/objects, as well as an exported tree.
* Support proxying to special remotes configured with
exporttree=yes annexobjects=yes.
* post-retrieve: When proxying is enabled for an exporttree=yes
special remote and the configured remote.name.annex-tracking-branch
is received, the tree is exported to the special remote.
* When getting from a P2P HTTP remote, prompt for credentials when
required, instead of failing.
* Prevent `updateproxy` and `updatecluster` from adding
an exporttree=yes special remote that does not have
annexobjects=yes, to avoid foot shooting.
* Implement `git-annex export treeish --to=foo --from=bar`, which
gets from bar as needed to send to foo. Make post-retrieve use
`--to=r --from=r` to handle the multiple files case.
## items deferred until later for p2p protocol over http