Merge branch 'exportreeplus'

This commit is contained in:
Joey Hess 2024-08-08 15:31:57 -04:00
commit 2616056cde
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
40 changed files with 705 additions and 222 deletions

View file

@ -44,6 +44,7 @@ import Annex.Transfer
import Annex.CheckIgnore import Annex.CheckIgnore
import Annex.CatFile import Annex.CatFile
import Annex.VectorClock import Annex.VectorClock
import Annex.SpecialRemote.Config
import Command import Command
import Backend import Backend
import Types.Key import Types.Key
@ -194,7 +195,7 @@ recordImportTree remote importtreeconfig imported = do
_ -> noop _ -> noop
-- When the remote is versioned, it still contains keys -- When the remote is versioned, it still contains keys
-- that are not present in the new tree. -- that are not present in the new tree.
unless (Remote.versionedExport (Remote.exportActions remote)) $ do unless (isVersioning (Remote.config remote)) $ do
db <- Export.openDb (Remote.uuid remote) db <- Export.openDb (Remote.uuid remote)
forM_ (exportedTreeishes oldexport) $ \oldtree -> forM_ (exportedTreeishes oldexport) $ \oldtree ->
Export.runExportDiffUpdater updater db oldtree finaltree Export.runExportDiffUpdater updater db oldtree finaltree

View file

@ -27,6 +27,8 @@ module Annex.Locations (
gitAnnexInodeSentinalCache, gitAnnexInodeSentinalCache,
annexLocationsBare, annexLocationsBare,
annexLocationsNonBare, annexLocationsNonBare,
annexLocation,
exportAnnexObjectLocation,
gitAnnexDir, gitAnnexDir,
gitAnnexObjectDir, gitAnnexObjectDir,
gitAnnexTmpOtherDir, gitAnnexTmpOtherDir,
@ -121,6 +123,7 @@ import Types.UUID
import Types.GitConfig import Types.GitConfig
import Types.Difference import Types.Difference
import Types.BranchState import Types.BranchState
import Types.Export
import qualified Git import qualified Git
import qualified Git.Types as Git import qualified Git.Types as Git
import Git.FilePath import Git.FilePath
@ -169,6 +172,13 @@ annexLocationsBare config key =
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
annexLocation config key hasher = objectDir P.</> keyPath key (hasher $ objectHashLevels config) annexLocation config key hasher = objectDir P.</> keyPath key (hasher $ objectHashLevels config)
{- For exportree remotes with annexobjects=true, objects are stored
- in this location as well as in the exported tree. -}
exportAnnexObjectLocation :: GitConfig -> Key -> ExportLocation
exportAnnexObjectLocation gc k =
mkExportLocation $
".git" P.</> annexLocation gc k hashDirLower
{- Number of subdirectories from the gitAnnexObjectDir {- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -} - to the gitAnnexLocation. -}
gitAnnexLocationDepth :: GitConfig -> Int gitAnnexLocationDepth :: GitConfig -> Int

View file

@ -19,12 +19,15 @@ import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
import Annex.Concurrent import Annex.Concurrent
import Annex.Tmp import Annex.Tmp
import Annex.Verify import Annex.Verify
import Annex.UUID
import Logs.Proxy import Logs.Proxy
import Logs.Cluster import Logs.Cluster
import Logs.UUID import Logs.UUID
import Logs.Location import Logs.Location
import Utility.Tmp.Dir import Utility.Tmp.Dir
import Utility.Metered import Utility.Metered
import Git.Types
import qualified Database.Export as Export
import Control.Concurrent.STM import Control.Concurrent.STM
import Control.Concurrent.Async import Control.Concurrent.Async
@ -63,8 +66,12 @@ proxySpecialRemoteSide clientmaxversion r = mkRemoteSide r $ do
owaitv <- liftIO newEmptyTMVarIO owaitv <- liftIO newEmptyTMVarIO
iclosedv <- liftIO newEmptyTMVarIO iclosedv <- liftIO newEmptyTMVarIO
oclosedv <- liftIO newEmptyTMVarIO oclosedv <- liftIO newEmptyTMVarIO
exportdb <- ifM (Remote.isExportSupported r)
( Just <$> Export.openDb (Remote.uuid r)
, pure Nothing
)
worker <- liftIO . async =<< forkState worker <- liftIO . async =<< forkState
(proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv) (proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv exportdb)
let remoteconn = P2PConnection let remoteconn = P2PConnection
{ connRepo = Nothing { connRepo = Nothing
, connCheckAuth = const False , connCheckAuth = const False
@ -75,6 +82,7 @@ proxySpecialRemoteSide clientmaxversion r = mkRemoteSide r $ do
let closeremoteconn = do let closeremoteconn = do
liftIO $ atomically $ putTMVar oclosedv () liftIO $ atomically $ putTMVar oclosedv ()
join $ liftIO (wait worker) join $ liftIO (wait worker)
maybe noop Export.closeDb exportdb
return $ Just return $ Just
( remoterunst ( remoterunst
, remoteconn , remoteconn
@ -89,8 +97,9 @@ proxySpecialRemote
-> TMVar (Either L.ByteString Message) -> TMVar (Either L.ByteString Message)
-> TMVar () -> TMVar ()
-> TMVar () -> TMVar ()
-> Maybe Export.ExportHandle
-> Annex () -> Annex ()
proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
where where
go :: Annex () go :: Annex ()
go = liftIO receivemessage >>= \case go = liftIO receivemessage >>= \case
@ -167,7 +176,7 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
proxyput af k = do proxyput af k = do
liftIO $ sendmessage $ PUT_FROM (Offset 0) liftIO $ sendmessage $ PUT_FROM (Offset 0)
withproxytmpfile k $ \tmpfile -> do withproxytmpfile k $ \tmpfile -> do
let store = tryNonAsync (Remote.storeKey r k af (Just (decodeBS tmpfile)) nullMeterUpdate) >>= \case let store = tryNonAsync (storeput k af (decodeBS tmpfile)) >>= \case
Right () -> liftIO $ sendmessage SUCCESS Right () -> liftIO $ sendmessage SUCCESS
Left err -> liftIO $ propagateerror err Left err -> liftIO $ propagateerror err
liftIO receivemessage >>= \case liftIO receivemessage >>= \case
@ -191,6 +200,25 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
_ -> giveup "protocol error" _ -> giveup "protocol error"
liftIO $ removeWhenExistsWith removeFile (fromRawFilePath tmpfile) liftIO $ removeWhenExistsWith removeFile (fromRawFilePath tmpfile)
storeput k af tmpfile = case mexportdb of
Just exportdb -> liftIO (Export.getExportTree exportdb k) >>= \case
[] -> storeputkey k af tmpfile
locs -> do
havelocs <- liftIO $ S.fromList
<$> Export.getExportedLocation exportdb k
let locs' = filter (`S.notMember` havelocs) locs
forM_ locs' $ \loc ->
storeputexport exportdb k loc tmpfile
liftIO $ Export.flushDbQueue exportdb
Nothing -> storeputkey k af tmpfile
storeputkey k af tmpfile =
Remote.storeKey r k af (Just tmpfile) nullMeterUpdate
storeputexport exportdb k loc tmpfile = do
Remote.storeExport (Remote.exportActions r) tmpfile k loc nullMeterUpdate
liftIO $ Export.addExportedLocation exportdb k loc
receivetofile iv h n = liftIO receivebytestring >>= \case receivetofile iv h n = liftIO receivebytestring >>= \case
Just b -> do Just b -> do
liftIO $ atomically $ liftIO $ atomically $
@ -248,9 +276,9 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
{- Check if this repository can proxy for a specified remote uuid, {- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -} - and if so enable proxying for it. -}
checkCanProxy :: UUID -> UUID -> Annex Bool checkCanProxy :: UUID -> UUID -> Annex Bool
checkCanProxy remoteuuid ouruuid = do checkCanProxy remoteuuid myuuid = do
ourproxies <- M.lookup ouruuid <$> getProxies myproxies <- M.lookup myuuid <$> getProxies
checkCanProxy' ourproxies remoteuuid >>= \case checkCanProxy' myproxies remoteuuid >>= \case
Right v -> do Right v -> do
Annex.changeState $ \st -> st { Annex.proxyremote = Just v } Annex.changeState $ \st -> st { Annex.proxyremote = Just v }
return True return True
@ -266,32 +294,12 @@ checkCanProxy' (Just proxies) remoteuuid =
Just cu -> proxyforcluster cu Just cu -> proxyforcluster cu
Nothing -> proxyfor ps Nothing -> proxyfor ps
where where
-- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do proxyfor ps = do
rs <- concat . Remote.byCost <$> Remote.remoteList rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = Remote.uuid r == remoteuuid case canProxyForRemote rs ps myclusters remoteuuid of
let samename r p = Remote.name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
Nothing -> notconfigured Nothing -> notconfigured
Just r -> return (Right (Right r)) Just r -> return (Right (Right r))
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch causing unexpected proxying for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do proxyforcluster cu = do
clusters <- getClusters clusters <- getClusters
@ -304,6 +312,57 @@ checkCanProxy' (Just proxies) remoteuuid =
"not configured to proxy for repository " ++ fromUUIDDesc desc "not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return $ Left Nothing Nothing -> return $ Left Nothing
{- Remotes that this repository is configured to proxy for.
-
- When there are multiple remotes that access the same repository,
- this picks the lowest cost one that is configured to be used as a proxy.
-}
proxyForRemotes :: Annex [Remote]
proxyForRemotes = do
myuuid <- getUUID
(M.lookup myuuid <$> getProxies) >>= \case
Nothing -> return []
Just myproxies -> do
let myproxies' = S.toList myproxies
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
return $ mapMaybe (canProxyForRemote rs myproxies' myclusters . Remote.uuid) rs
-- Only proxy for a remote when the git configuration allows it.
-- This is important to prevent changes to the git-annex branch
-- causing unexpected proxying for remotes.
canProxyForRemote
:: [Remote] -- ^ must be sorted by cost
-> [Proxy]
-> M.Map RemoteName ClusterUUID
-> UUID
-> (Maybe Remote)
canProxyForRemote rs myproxies myclusters remoteuuid =
headMaybe $ filter canproxy rs
where
canproxy r =
sameuuid r &&
proxyisconfigured r &&
any (isproxyfor r) myproxies
sameuuid r = Remote.uuid r == remoteuuid
isproxyfor r p =
proxyRemoteUUID p == remoteuuid &&
Remote.name r == proxyRemoteName p
proxyisconfigured r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
mkProxyMethods :: ProxyMethods mkProxyMethods :: ProxyMethods
mkProxyMethods = ProxyMethods mkProxyMethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing { removedContent = \u k -> logChange k u InfoMissing

View file

@ -93,6 +93,9 @@ exportTreeField = Accepted "exporttree"
importTreeField :: RemoteConfigField importTreeField :: RemoteConfigField
importTreeField = Accepted "importtree" importTreeField = Accepted "importtree"
versioningField :: RemoteConfigField
versioningField = Accepted "versioning"
exportTree :: ParsedRemoteConfig -> Bool exportTree :: ParsedRemoteConfig -> Bool
exportTree = fromMaybe False . getRemoteConfigValue exportTreeField exportTree = fromMaybe False . getRemoteConfigValue exportTreeField
@ -100,6 +103,15 @@ exportTree = fromMaybe False . getRemoteConfigValue exportTreeField
importTree :: ParsedRemoteConfig -> Bool importTree :: ParsedRemoteConfig -> Bool
importTree = fromMaybe False . getRemoteConfigValue importTreeField importTree = fromMaybe False . getRemoteConfigValue importTreeField
isVersioning :: ParsedRemoteConfig -> Bool
isVersioning = fromMaybe False . getRemoteConfigValue versioningField
annexObjectsField :: RemoteConfigField
annexObjectsField = Accepted "annexobjects"
annexObjects :: ParsedRemoteConfig -> Bool
annexObjects = fromMaybe False . getRemoteConfigValue annexObjectsField
{- Parsers for fields that are common to all special remotes. -} {- Parsers for fields that are common to all special remotes. -}
commonFieldParsers :: [RemoteConfigFieldParser] commonFieldParsers :: [RemoteConfigFieldParser]
commonFieldParsers = commonFieldParsers =
@ -124,6 +136,8 @@ essentialFieldParsers =
(FieldDesc "export trees of files to this remote") (FieldDesc "export trees of files to this remote")
, yesNoParser importTreeField (Just False) , yesNoParser importTreeField (Just False)
(FieldDesc "import trees of files from this remote") (FieldDesc "import trees of files from this remote")
, yesNoParser annexObjectsField (Just False)
(FieldDesc "store other objects in remote along with exported trees")
] ]
autoEnableFieldParser :: RemoteConfigFieldParser autoEnableFieldParser :: RemoteConfigFieldParser

View file

@ -1,3 +1,20 @@
git-annex (10.20240831) UNRELEASED; urgency=medium
* export: Added --from option.
* Special remotes configured with exporttree=yes annexobjects=yes
can store objects in .git/annex/objects, as well as an exported tree.
* Support proxying to special remotes configured with
exporttree=yes annexobjects=yes.
* post-retrieve: When proxying is enabled for an exporttree=yes
special remote and the configured remote.name.annex-tracking-branch
is received, the tree is exported to the special remote.
* updateproxy, updatecluster: Prevent using an exporttree=yes special
remote that does not have annexobjects=yes, since it will not work.
* git-remote-annex: Store objects in exportree=yes special remotes
in the same paths used by annexobjects=yes.
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400
git-annex (10.20240808) upstream; urgency=medium git-annex (10.20240808) upstream; urgency=medium
* Remove debug output (to stderr) accidentially included in * Remove debug output (to stderr) accidentially included in

View file

@ -1010,14 +1010,13 @@ getKeyExportLocations rmt k = do
-- inside the .git/annex/objects/ directory in the remote. -- inside the .git/annex/objects/ directory in the remote.
-- --
-- The first ExportLocation in the returned list is the one that -- The first ExportLocation in the returned list is the one that
-- is the same as the local repository would use. But it's possible -- should be used to store a key. But it's possible
-- that one of the others in the list was used by another repository to -- that one of the others in the list was used.
-- upload a git key.
keyExportLocations :: Remote -> Key -> GitConfig -> UUID -> Maybe [ExportLocation] keyExportLocations :: Remote -> Key -> GitConfig -> UUID -> Maybe [ExportLocation]
keyExportLocations rmt k cfg uuid keyExportLocations rmt k cfg uuid
| exportTree (Remote.config rmt) || importTree (Remote.config rmt) = | exportTree (Remote.config rmt) || importTree (Remote.config rmt) =
Just $ map (\p -> mkExportLocation (".git" P.</> p)) $ Just $ map (\p -> mkExportLocation (".git" P.</> p)) $
concatMap (`annexLocationsNonBare` k) cfgs concatMap (`annexLocationsBare` k) cfgs
| otherwise = Nothing | otherwise = Nothing
where where
-- When git-annex has not been initialized yet (eg, when cloning), -- When git-annex has not been initialized yet (eg, when cloning),

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2017-2023 Joey Hess <id@joeyh.name> - Copyright 2017-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -21,6 +21,7 @@ import Git.Types
import Git.FilePath import Git.FilePath
import Git.Sha import Git.Sha
import qualified Remote import qualified Remote
import qualified Types.Remote as Remote
import Types.Remote import Types.Remote
import Types.Export import Types.Export
import Annex.Export import Annex.Export
@ -29,6 +30,7 @@ import Annex.Transfer
import Annex.CatFile import Annex.CatFile
import Annex.FileMatcher import Annex.FileMatcher
import Annex.RemoteTrackingBranch import Annex.RemoteTrackingBranch
import Annex.SpecialRemote.Config
import Logs.Location import Logs.Location
import Logs.Export import Logs.Export
import Logs.PreferredContent import Logs.PreferredContent
@ -41,6 +43,7 @@ import Utility.Matcher
import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S
import Control.Concurrent import Control.Concurrent
cmd :: Command cmd :: Command
@ -53,6 +56,7 @@ data ExportOptions = ExportOptions
{ exportTreeish :: Git.Ref { exportTreeish :: Git.Ref
-- ^ can be a tree, a branch, a commit, or a tag -- ^ can be a tree, a branch, a commit, or a tag
, exportRemote :: DeferredParse Remote , exportRemote :: DeferredParse Remote
, sourceRemote :: [DeferredParse Remote]
, exportTracking :: Bool , exportTracking :: Bool
} }
@ -60,6 +64,7 @@ optParser :: CmdParamsDesc -> Parser ExportOptions
optParser _ = ExportOptions optParser _ = ExportOptions
<$> (Git.Ref <$> parsetreeish) <$> (Git.Ref <$> parsetreeish)
<*> (mkParseRemoteOption <$> parseToOption) <*> (mkParseRemoteOption <$> parseToOption)
<*> many (mkParseRemoteOption <$> parseFromOption)
<*> parsetracking <*> parsetracking
where where
parsetreeish = argument str parsetreeish = argument str
@ -82,6 +87,9 @@ seek o = startConcurrency commandStages $ do
unlessM (isExportSupported r) $ unlessM (isExportSupported r) $
giveup "That remote does not support exports." giveup "That remote does not support exports."
srcrs <- concat . Remote.byCost
<$> mapM getParsed (sourceRemote o)
-- handle deprecated option -- handle deprecated option
when (exportTracking o) $ when (exportTracking o) $
setConfig (remoteAnnexConfig r "tracking-branch") setConfig (remoteAnnexConfig r "tracking-branch")
@ -92,12 +100,15 @@ seek o = startConcurrency commandStages $ do
inRepo (Git.Ref.tree (exportTreeish o)) inRepo (Git.Ref.tree (exportTreeish o))
mtbcommitsha <- getExportCommit r (exportTreeish o) mtbcommitsha <- getExportCommit r (exportTreeish o)
seekExport r tree mtbcommitsha srcrs
seekExport :: Remote -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> [Remote] -> CommandSeek
seekExport r tree mtbcommitsha srcrs = do
db <- openDb (uuid r) db <- openDb (uuid r)
writeLockDbWhile db $ do writeLockDbWhile db $ do
changeExport r db tree changeExport r db tree
unlessM (Annex.getRead Annex.fast) $ do unlessM (Annex.getRead Annex.fast) $ do
void $ fillExport r db tree mtbcommitsha void $ fillExport r db tree mtbcommitsha srcrs
closeDb db closeDb db
-- | When the treeish is a branch like master or refs/heads/master -- | When the treeish is a branch like master or refs/heads/master
@ -150,16 +161,15 @@ changeExport r db (ExportFiltered new) = do
[oldtreesha] -> do [oldtreesha] -> do
diffmap <- mkDiffMap oldtreesha new db diffmap <- mkDiffMap oldtreesha new db
let seekdiffmap a = mapM_ a (M.toList diffmap) let seekdiffmap a = mapM_ a (M.toList diffmap)
-- Rename old files to temp, or delete. let disposeoldf = \ek oldf -> commandAction $
let deleteoldf = \ek oldf -> commandAction $ startDispose r db oldf ek
startUnexport' r db oldf ek
seekdiffmap $ \case seekdiffmap $ \case
(ek, (oldf:oldfs, _newf:_)) -> do (ek, (oldf:oldfs, _newf:_)) -> do
commandAction $ commandAction $
startMoveToTempName r db oldf ek startMoveToTempName r db oldf ek
forM_ oldfs (deleteoldf ek) forM_ oldfs (disposeoldf ek)
(ek, (oldfs, [])) -> (ek, (oldfs, [])) ->
forM_ oldfs (deleteoldf ek) forM_ oldfs (disposeoldf ek)
(_ek, ([], _)) -> noop (_ek, ([], _)) -> noop
waitForAllRunningCommandActions waitForAllRunningCommandActions
-- Rename from temp to new files. -- Rename from temp to new files.
@ -237,8 +247,8 @@ newtype AllFilled = AllFilled { fromAllFilled :: Bool }
-- --
-- Once all exported files have reached the remote, updates the -- Once all exported files have reached the remote, updates the
-- remote tracking branch. -- remote tracking branch.
fillExport :: Remote -> ExportHandle -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> Annex Bool fillExport :: Remote -> ExportHandle -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> [Remote] -> Annex Bool
fillExport r db (ExportFiltered newtree) mtbcommitsha = do fillExport r db (ExportFiltered newtree) mtbcommitsha srcrs = do
(l, cleanup) <- inRepo $ Git.LsTree.lsTree (l, cleanup) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong False) (Git.LsTree.LsTreeLong False)
@ -246,7 +256,7 @@ fillExport r db (ExportFiltered newtree) mtbcommitsha = do
cvar <- liftIO $ newMVar (FileUploaded False) cvar <- liftIO $ newMVar (FileUploaded False)
allfilledvar <- liftIO $ newMVar (AllFilled True) allfilledvar <- liftIO $ newMVar (AllFilled True)
commandActions $ commandActions $
map (startExport r db cvar allfilledvar) l map (startExport r srcrs db cvar allfilledvar) l
void $ liftIO $ cleanup void $ liftIO $ cleanup
waitForAllRunningCommandActions waitForAllRunningCommandActions
@ -259,8 +269,8 @@ fillExport r db (ExportFiltered newtree) mtbcommitsha = do
liftIO $ fromFileUploaded <$> takeMVar cvar liftIO $ fromFileUploaded <$> takeMVar cvar
startExport :: Remote -> ExportHandle -> MVar FileUploaded -> MVar AllFilled -> Git.LsTree.TreeItem -> CommandStart startExport :: Remote -> [Remote] -> ExportHandle -> MVar FileUploaded -> MVar AllFilled -> Git.LsTree.TreeItem -> CommandStart
startExport r db cvar allfilledvar ti = do startExport r srcrs db cvar allfilledvar ti = do
ek <- exportKey (Git.LsTree.sha ti) ek <- exportKey (Git.LsTree.sha ti)
stopUnless (notrecordedpresent ek) $ stopUnless (notrecordedpresent ek) $
starting ("export " ++ name r) ai si $ starting ("export " ++ name r) ai si $
@ -268,7 +278,7 @@ startExport r db cvar allfilledvar ti = do
( next $ cleanupExport r db ek loc False ( next $ cleanupExport r db ek loc False
, do , do
liftIO $ modifyMVar_ cvar (pure . const (FileUploaded True)) liftIO $ modifyMVar_ cvar (pure . const (FileUploaded True))
performExport r db ek af (Git.LsTree.sha ti) loc allfilledvar performExport r srcrs db ek af (Git.LsTree.sha ti) loc allfilledvar
) )
where where
loc = mkExportLocation f loc = mkExportLocation f
@ -291,26 +301,10 @@ startExport r db cvar allfilledvar ti = do
else notElem (uuid r) <$> loggedLocations ek else notElem (uuid r) <$> loggedLocations ek
) )
performExport :: Remote -> ExportHandle -> Key -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform performExport :: Remote -> [Remote] -> ExportHandle -> Key -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform
performExport r db ek af contentsha loc allfilledvar = do performExport r srcrs db ek af contentsha loc allfilledvar = do
let storer = storeExport (exportActions r)
sent <- tryNonAsync $ if not (isGitShaKey ek) sent <- tryNonAsync $ if not (isGitShaKey ek)
then ifM (inAnnex ek) then tryrenameannexobject $ sendannexobject
( notifyTransfer Upload af $
-- alwaysUpload because the same key
-- could be used for more than one export
-- location, and concurrently uploading
-- of the content should still be allowed.
alwaysUpload (uuid r) ek af Nothing stdRetry $ \pm -> do
let rollback = void $
performUnexport r db [ek] loc
sendAnnex ek Nothing rollback $ \f _sz ->
Remote.action $
storer f ek loc pm
, do
showNote "not available"
return False
)
-- Sending a non-annexed file. -- Sending a non-annexed file.
else withTmpFile "export" $ \tmp h -> do else withTmpFile "export" $ \tmp h -> do
b <- catObject contentsha b <- catObject contentsha
@ -327,6 +321,65 @@ performExport r db ek af contentsha loc allfilledvar = do
Left err -> do Left err -> do
failedsend failedsend
throwM err throwM err
where
storer = storeExport (exportActions r)
sendannexobject = ifM (inAnnex ek)
( sendlocalannexobject
, do
locs <- S.fromList <$> loggedLocations ek
case filter (\sr -> S.member (Remote.uuid sr) locs) srcrs of
[] -> do
showNote "not available"
return False
(srcr:_) -> getsendannexobject srcr
)
sendlocalannexobject = sendwith $ \p -> do
let rollback = void $
performUnexport r db [ek] loc
sendAnnex ek Nothing rollback $ \f _sz ->
Remote.action $
storer f ek loc p
sendwith a =
notifyTransfer Upload af $
-- alwaysUpload because the same key
-- could be used for more than one export
-- location, and concurrently uploading
-- of the content should still be allowed.
alwaysUpload (uuid r) ek af Nothing stdRetry a
-- Similar to Command.Move.fromToPerform, use a regular download
-- of a local copy, lock early, and drop the local copy after sending.
getsendannexobject srcr = do
showAction $ UnquotedString $ "from " ++ Remote.name srcr
ifM (notifyTransfer Download af $ download srcr ek af stdRetry)
( lockContentForRemoval ek (return False) $ \contentlock -> do
showAction $ UnquotedString $ "to " ++ Remote.name r
sendlocalannexobject
`finally` removeAnnex contentlock
, return False
)
tryrenameannexobject fallback
| annexObjects (Remote.config r) = do
case renameExport (exportActions r) of
Just renameaction -> do
locs <- loggedLocations ek
gc <- Annex.getGitConfig
let objloc = exportAnnexObjectLocation gc ek
if Remote.uuid r `elem` locs
then tryNonAsync (renameaction ek objloc loc) >>= \case
Right (Just ()) -> do
liftIO $ addExportedLocation db ek loc
liftIO $ flushDbQueue db
return True
Left _err -> fallback
Right Nothing -> fallback
else fallback
Nothing -> fallback
| otherwise = fallback
cleanupExport :: Remote -> ExportHandle -> Key -> ExportLocation -> Bool -> CommandCleanup cleanupExport :: Remote -> ExportHandle -> Key -> ExportLocation -> Bool -> CommandCleanup
cleanupExport r db ek loc sent = do cleanupExport r db ek loc sent = do
@ -348,16 +401,6 @@ startUnexport r db f shas = do
ai = ActionItemTreeFile f' ai = ActionItemTreeFile f'
si = SeekInput [] si = SeekInput []
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startUnexport' r db f ek =
starting ("unexport " ++ name r) ai si $
performUnexport r db [ek] loc
where
loc = mkExportLocation f'
f' = getTopFilePath f
ai = ActionItemTreeFile f'
si = SeekInput []
-- Unlike a usual drop from a repository, this does not check that -- Unlike a usual drop from a repository, this does not check that
-- numcopies is satisfied before removing the content. Typically an export -- numcopies is satisfied before removing the content. Typically an export
-- remote is untrusted, so would not count as a copy anyway. -- remote is untrusted, so would not count as a copy anyway.
@ -379,18 +422,43 @@ cleanupUnexport r db eks loc = do
removeExportedLocation db ek loc removeExportedLocation db ek loc
flushDbQueue db flushDbQueue db
-- A versionedExport remote supports removeExportLocation to remove -- A versioned remote supports removeExportLocation to remove
-- the file from the exported tree, but still retains the content -- the file from the exported tree, but still retains the content
-- and allows retrieving it. -- and allows retrieving it.
unless (versionedExport (exportActions r)) $ do unless (isVersioning (Remote.config r)) $ do
remaininglocs <- liftIO $ remaininglocs <- liftIO $
concat <$> forM eks (getExportedLocation db) concat <$> forM eks (getExportedLocation db)
when (null remaininglocs) $ when (null remaininglocs) $
forM_ eks $ \ek -> forM_ eks $ \ek ->
logChange ek (uuid r) InfoMissing -- When annexobject=true, a key that
-- was unexported may still be present
-- on the remote.
if annexObjects (Remote.config r)
then tryNonAsync (checkPresent r ek) >>= \case
Right False ->
logChange ek (uuid r) InfoMissing
_ -> noop
else logChange ek (uuid r) InfoMissing
removeEmptyDirectories r db loc eks removeEmptyDirectories r db loc eks
-- Dispose of an old exported file by either unexporting it, or by moving
-- it to the annexobjects location.
startDispose :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startDispose r db f ek =
starting ("unexport " ++ name r) ai si $
if annexObjects (Remote.config r) && not (isGitShaKey ek)
then do
gc <- Annex.getGitConfig
performRename False r db ek loc
(exportAnnexObjectLocation gc ek)
else performUnexport r db [ek] loc
where
loc = mkExportLocation f'
f' = getTopFilePath f
ai = ActionItemTreeFile f'
si = SeekInput []
startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
startRecoverIncomplete r db sha oldf startRecoverIncomplete r db sha oldf
| sha `elem` nullShas = stop | sha `elem` nullShas = stop
@ -408,7 +476,7 @@ startRecoverIncomplete r db sha oldf
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startMoveToTempName r db f ek = case renameExport (exportActions r) of startMoveToTempName r db f ek = case renameExport (exportActions r) of
Just _ -> starting ("rename " ++ name r) ai si $ Just _ -> starting ("rename " ++ name r) ai si $
performRename r db ek loc tmploc performRename True r db ek loc tmploc
Nothing -> starting ("unexport " ++ name r) ai' si $ Nothing -> starting ("unexport " ++ name r) ai' si $
performUnexport r db [ek] loc performUnexport r db [ek] loc
where where
@ -424,7 +492,7 @@ startMoveFromTempName :: Remote -> ExportHandle -> Key -> TopFilePath -> Command
startMoveFromTempName r db ek f = case renameExport (exportActions r) of startMoveFromTempName r db ek f = case renameExport (exportActions r) of
Just _ -> stopUnless (liftIO $ elem tmploc <$> getExportedLocation db ek) $ Just _ -> stopUnless (liftIO $ elem tmploc <$> getExportedLocation db ek) $
starting ("rename " ++ name r) ai si $ starting ("rename " ++ name r) ai si $
performRename r db ek tmploc loc performRename True r db ek tmploc loc
Nothing -> starting ("unexport " ++ name r) ai' si $ Nothing -> starting ("unexport " ++ name r) ai' si $
performUnexport r db [ek] tmploc performUnexport r db [ek] tmploc
where where
@ -436,12 +504,14 @@ startMoveFromTempName r db ek f = case renameExport (exportActions r) of
ai' = ActionItemTreeFile (fromExportLocation tmploc) ai' = ActionItemTreeFile (fromExportLocation tmploc)
si = SeekInput [] si = SeekInput []
performRename :: Remote -> ExportHandle -> Key -> ExportLocation -> ExportLocation -> CommandPerform performRename :: Bool -> Remote -> ExportHandle -> Key -> ExportLocation -> ExportLocation -> CommandPerform
performRename r db ek src dest = case renameExport (exportActions r) of performRename warnonfail r db ek src dest = case renameExport (exportActions r) of
Just renameaction -> tryNonAsync (renameaction ek src dest) >>= \case Just renameaction -> tryNonAsync (renameaction ek src dest) >>= \case
Right (Just ()) -> next $ cleanupRename r db ek src dest Right (Just ()) -> next $ cleanupRename r db ek src dest
Left err -> do Left err -> do
warning $ UnquotedString $ "rename failed (" ++ show err ++ "); deleting instead" when warnonfail $
warning $ UnquotedString $
"rename failed (" ++ show err ++ "); deleting instead"
fallbackdelete fallbackdelete
Right Nothing -> fallbackdelete Right Nothing -> fallbackdelete
-- remote does not support renaming -- remote does not support renaming

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2017 Joey Hess <id@joeyh.name> - Copyright 2017-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -11,10 +11,23 @@ module Command.PostReceive where
import Command import Command
import qualified Annex import qualified Annex
import Git.Types
import Annex.UpdateInstead import Annex.UpdateInstead
import Annex.CurrentBranch import Annex.CurrentBranch
import Command.Sync (mergeLocal, prepMerge, mergeConfig, SyncOptions(..)) import Command.Sync (mergeLocal, prepMerge, mergeConfig, SyncOptions(..))
import Annex.Proxy
import Remote
import qualified Types.Remote as Remote
import Config
import Git.Types
import Git.Sha
import Git.FilePath
import qualified Git.Ref
import Command.Export (filterExport, getExportCommit, seekExport)
import Command.Sync (syncBranch)
import qualified Data.Set as S
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
-- This does not need to modify the git-annex branch to update the -- This does not need to modify the git-annex branch to update the
-- work tree, but auto-initialization might change the git-annex branch. -- work tree, but auto-initialization might change the git-annex branch.
@ -28,9 +41,62 @@ cmd = noCommit $
(withParams seek) (withParams seek)
seek :: CmdParams -> CommandSeek seek :: CmdParams -> CommandSeek
seek _ = whenM needUpdateInsteadEmulation $ do seek _ = do
fixPostReceiveHookEnv fixPostReceiveHookEnv
commandAction updateInsteadEmulation whenM needUpdateInsteadEmulation $
commandAction updateInsteadEmulation
proxyExportTree
updateInsteadEmulation :: CommandStart
updateInsteadEmulation = do
prepMerge
let o = def { notOnlyAnnexOption = True }
mc <- mergeConfig False
mergeLocal mc o =<< getCurrentBranch
proxyExportTree :: CommandSeek
proxyExportTree = do
rbs <- catMaybes <$> (mapM canexport =<< proxyForRemotes)
unless (null rbs) $ do
pushedbranches <- liftIO $
S.fromList . map snd . parseHookInput
<$> B.hGetContents stdin
let waspushed (r, (b, d))
| S.member (Git.Ref.branchRef b) pushedbranches =
Just (r, b, d)
| S.member (Git.Ref.branchRef (syncBranch b)) pushedbranches =
Just (r, syncBranch b, d)
| otherwise = Nothing
case headMaybe (mapMaybe waspushed rbs) of
Just (r, b, Nothing) -> go r b
Just (r, b, Just d) -> go r $
Git.Ref.branchFileRef b (getTopFilePath d)
Nothing -> noop
where
canexport r = case remoteAnnexTrackingBranch (Remote.gitconfig r) of
Nothing -> return Nothing
Just branch ->
ifM (isExportSupported r)
( return (Just (r, splitRemoteAnnexTrackingBranchSubdir branch))
, return Nothing
)
go r b = inRepo (Git.Ref.tree b) >>= \case
Nothing -> return ()
Just t -> do
tree <- filterExport r t
mtbcommitsha <- getExportCommit r b
seekExport r tree mtbcommitsha [r]
parseHookInput :: B.ByteString -> [((Sha, Sha), Ref)]
parseHookInput = mapMaybe parse . B8.lines
where
parse l = case B8.words l of
(oldb:newb:refb:[]) -> do
old <- extractSha oldb
new <- extractSha newb
return ((old, new), Ref refb)
_ -> Nothing
{- When run by the post-receive hook, the cwd is the .git directory, {- When run by the post-receive hook, the cwd is the .git directory,
- and GIT_DIR=. It's not clear why git does this. - and GIT_DIR=. It's not clear why git does this.
@ -50,9 +116,3 @@ fixPostReceiveHookEnv = do
} }
_ -> noop _ -> noop
updateInsteadEmulation :: CommandStart
updateInsteadEmulation = do
prepMerge
let o = def { notOnlyAnnexOption = True }
mc <- mergeConfig False
mergeLocal mc o =<< getCurrentBranch

View file

@ -28,6 +28,7 @@ module Command.Sync (
parseUnrelatedHistoriesOption, parseUnrelatedHistoriesOption,
SyncOptions(..), SyncOptions(..),
OperationMode(..), OperationMode(..),
syncBranch,
) where ) where
import Command import Command
@ -87,8 +88,6 @@ import Utility.Tuple
import Control.Concurrent.MVar import Control.Concurrent.MVar
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString as S
import Data.Char
cmd :: Command cmd :: Command
cmd = withAnnexOptions [jobsOption, backendOption] $ cmd = withAnnexOptions [jobsOption, backendOption] $
@ -307,15 +306,15 @@ seek' o = startConcurrency transferStages $ do
-- repositories, in case that lets content -- repositories, in case that lets content
-- be dropped from other repositories. -- be dropped from other repositories.
exportedcontent <- withbranch $ exportedcontent <- withbranch $
seekExportContent (Just o) seekExportContent (Just o) contentremotes
(filter isExport contentremotes)
-- Sync content with remotes, including -- Sync content with remotes, including
-- importing from import remotes (since -- importing from import remotes (since
-- importing only downloads new files not -- importing only downloads new files not
-- old files) -- old files)
let shouldsynccontent r let shouldsynccontent r
| isExport r && not (isImport r) = False | isExport r && not (isImport r)
&& not (exportHasAnnexObjects r) = False
| otherwise = True | otherwise = True
syncedcontent <- withbranch $ syncedcontent <- withbranch $
seekSyncContent o seekSyncContent o
@ -944,7 +943,8 @@ syncFile o ebloom rs af k = do
wantput r wantput r
| pushOption o == False && operationMode o /= SatisfyMode = return False | pushOption o == False && operationMode o /= SatisfyMode = return False
| Remote.readonly r || remoteAnnexReadOnly (Remote.gitconfig r) = return False | Remote.readonly r || remoteAnnexReadOnly (Remote.gitconfig r) = return False
| isExport r || isImport r = return False | isImport r && not (isExport r) = return False
| isExport r && not (exportHasAnnexObjects r) = return False
| isThirdPartyPopulated r = return False | isThirdPartyPopulated r = return False
| otherwise = wantGetBy True (Just k) af (Remote.uuid r) | otherwise = wantGetBy True (Just k) af (Remote.uuid r)
handleput lack inhere handleput lack inhere
@ -975,7 +975,13 @@ syncFile o ebloom rs af k = do
- Returns True if any file transfers were made. - Returns True if any file transfers were made.
-} -}
seekExportContent :: Maybe SyncOptions -> [Remote] -> CurrBranch -> Annex Bool seekExportContent :: Maybe SyncOptions -> [Remote] -> CurrBranch -> Annex Bool
seekExportContent o rs (mcurrbranch, madj) seekExportContent o rs currbranch =
seekExportContent' o (filter canexportcontent rs) currbranch
where
canexportcontent r = isExport r && not (isProxied r)
seekExportContent' :: Maybe SyncOptions -> [Remote] -> CurrBranch -> Annex Bool
seekExportContent' o rs (mcurrbranch, madj)
| null rs = return False | null rs = return False
| otherwise = do | otherwise = do
-- Propagate commits from the adjusted branch, so that -- Propagate commits from the adjusted branch, so that
@ -1013,7 +1019,7 @@ seekExportContent o rs (mcurrbranch, madj)
| tree == currtree -> do | tree == currtree -> do
filteredtree <- Command.Export.filterExport r tree filteredtree <- Command.Export.filterExport r tree
Command.Export.changeExport r db filteredtree Command.Export.changeExport r db filteredtree
Command.Export.fillExport r db filteredtree mtbcommitsha Command.Export.fillExport r db filteredtree mtbcommitsha []
| otherwise -> cannotupdateexport r db Nothing False | otherwise -> cannotupdateexport r db Nothing False
(Nothing, _, _) -> cannotupdateexport r db (Just (Git.fromRef b ++ " does not exist")) True (Nothing, _, _) -> cannotupdateexport r db (Just (Git.fromRef b ++ " does not exist")) True
(_, Nothing, _) -> cannotupdateexport r db (Just "no branch is currently checked out") True (_, Nothing, _) -> cannotupdateexport r db (Just "no branch is currently checked out") True
@ -1056,7 +1062,7 @@ seekExportContent o rs (mcurrbranch, madj)
-- filling in any files that did not get transferred -- filling in any files that did not get transferred
-- to the existing exported tree. -- to the existing exported tree.
let filteredtree = Command.Export.ExportFiltered tree let filteredtree = Command.Export.ExportFiltered tree
Command.Export.fillExport r db filteredtree mtbcommitsha Command.Export.fillExport r db filteredtree mtbcommitsha []
fillexistingexport r _ _ _ = do fillexistingexport r _ _ _ = do
warnExportImportConflict r warnExportImportConflict r
return False return False
@ -1147,14 +1153,11 @@ isExport = exportTree . Remote.config
isImport :: Remote -> Bool isImport :: Remote -> Bool
isImport = importTree . Remote.config isImport = importTree . Remote.config
isProxied :: Remote -> Bool
isProxied = isJust . remoteAnnexProxiedBy . Remote.gitconfig
exportHasAnnexObjects :: Remote -> Bool
exportHasAnnexObjects = annexObjects . Remote.config
isThirdPartyPopulated :: Remote -> Bool isThirdPartyPopulated :: Remote -> Bool
isThirdPartyPopulated = Remote.thirdPartyPopulated . Remote.remotetype isThirdPartyPopulated = Remote.thirdPartyPopulated . Remote.remotetype
splitRemoteAnnexTrackingBranchSubdir :: Git.Ref -> (Git.Ref, Maybe TopFilePath)
splitRemoteAnnexTrackingBranchSubdir tb = (branch, subdir)
where
(b, p) = separate' (== (fromIntegral (ord ':'))) (Git.fromRef' tb)
branch = Git.Ref b
subdir = if S.null p
then Nothing
else Just (asTopFilePath p)

View file

@ -34,10 +34,17 @@ seek = withNothing $ do
start :: CommandStart start :: CommandStart
start = startingCustomOutput (ActionItemOther Nothing) $ do start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList rs <- R.remoteList
let getnode r = do let getnode r = case remoteAnnexClusterNode (R.gitconfig r) of
clusternames <- remoteAnnexClusterNode (R.gitconfig r) Nothing -> return Nothing
return $ M.fromList $ zip clusternames (repeat (S.singleton r)) Just [] -> return Nothing
let myclusternodes = M.unionsWith S.union (mapMaybe getnode rs) Just clusternames ->
ifM (Command.UpdateProxy.checkCanProxy r "Cannot use this special remote as a cluster node.")
( return $ Just $ M.fromList $
zip clusternames (repeat (S.singleton r))
, return Nothing
)
myclusternodes <- M.unionsWith S.union . catMaybes
<$> mapM getnode rs
myclusters <- annexClusters <$> Annex.getGitConfig myclusters <- annexClusters <$> Annex.getGitConfig
recordedclusters <- getClusters recordedclusters <- getClusters
descs <- R.uuidDescriptions descs <- R.uuidDescriptions

View file

@ -14,6 +14,7 @@ import Logs.Cluster
import Annex.UUID import Annex.UUID
import qualified Remote as R import qualified Remote as R
import qualified Types.Remote as R import qualified Types.Remote as R
import Annex.SpecialRemote.Config
import Utility.SafeOutput import Utility.SafeOutput
import qualified Data.Map as M import qualified Data.Map as M
@ -30,8 +31,8 @@ seek = withNothing (commandAction start)
start :: CommandStart start :: CommandStart
start = startingCustomOutput (ActionItemOther Nothing) $ do start = startingCustomOutput (ActionItemOther Nothing) $ do
rs <- R.remoteList rs <- R.remoteList
let remoteproxies = S.fromList $ map mkproxy $ remoteproxies <- S.fromList . map mkproxy
filter (isproxy . R.gitconfig) rs <$> filterM isproxy rs
clusterproxies <- getClusterProxies remoteproxies clusterproxies <- getClusterProxies remoteproxies
let proxies = S.union remoteproxies clusterproxies let proxies = S.union remoteproxies clusterproxies
u <- getUUID u <- getUUID
@ -54,9 +55,33 @@ start = startingCustomOutput (ActionItemOther Nothing) $ do
"Stopped proxying for " ++ proxyRemoteName p "Stopped proxying for " ++ proxyRemoteName p
_ -> noop _ -> noop
isproxy c = remoteAnnexProxy c || not (null (remoteAnnexClusterNode c))
mkproxy r = Proxy (R.uuid r) (R.name r) mkproxy r = Proxy (R.uuid r) (R.name r)
isproxy r
| remoteAnnexProxy (R.gitconfig r) || not (null (remoteAnnexClusterNode (R.gitconfig r))) =
checkCanProxy r "Cannot proxy to this special remote."
| otherwise = pure False
checkCanProxy :: Remote -> String -> Annex Bool
checkCanProxy r cannotmessage =
ifM (R.isExportSupported r)
( if annexObjects (R.config r)
then pure True
else do
warnannexobjects
pure False
, pure True
)
where
warnannexobjects = warning $ UnquotedString $ unwords
[ R.name r
, "is configured with exporttree=yes, but without"
, "annexobjects=yes."
, cannotmessage
, "Suggest you run: git-annex enableremote"
, R.name r
, "annexobjects=yes"
]
-- Automatically proxy nodes of any cluster this repository is configured -- Automatically proxy nodes of any cluster this repository is configured
-- to serve as a gateway for. Also proxy other cluster nodes that are -- to serve as a gateway for. Also proxy other cluster nodes that are

View file

@ -26,8 +26,12 @@ import Types.Availability
import Types.GitConfig import Types.GitConfig
import Types.RemoteConfig import Types.RemoteConfig
import Git.Types import Git.Types
import Git.FilePath
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config
import Data.Char
import qualified Data.ByteString as S
{- Looks up a setting in git config. This is not as efficient as using the {- Looks up a setting in git config. This is not as efficient as using the
- GitConfig type. -} - GitConfig type. -}
getConfig :: ConfigKey -> ConfigValue -> Annex ConfigValue getConfig :: ConfigKey -> ConfigValue -> Annex ConfigValue
@ -99,3 +103,12 @@ pidLockFile = ifM (annexPidLock <$> Annex.getGitConfig)
#else #else
pidLockFile = pure Nothing pidLockFile = pure Nothing
#endif #endif
splitRemoteAnnexTrackingBranchSubdir :: Git.Ref -> (Git.Ref, Maybe TopFilePath)
splitRemoteAnnexTrackingBranchSubdir tb = (branch, subdir)
where
(b, p) = separate' (== (fromIntegral (ord ':'))) (Git.fromRef' tb)
branch = Git.Ref b
subdir = if S.null p
then Nothing
else Just (asTopFilePath p)

View file

@ -91,7 +91,6 @@ gen r u rc gc rs = do
{ storeExport = storeExportM serial adir { storeExport = storeExportM serial adir
, retrieveExport = retrieveExportM serial adir , retrieveExport = retrieveExportM serial adir
, removeExport = removeExportM serial adir , removeExport = removeExportM serial adir
, versionedExport = False
, checkPresentExport = checkPresentExportM serial adir , checkPresentExport = checkPresentExportM serial adir
, removeExportDirectory = Just $ removeExportDirectoryM serial adir , removeExportDirectory = Just $ removeExportDirectoryM serial adir
, renameExport = Just $ renameExportM serial adir , renameExport = Just $ renameExportM serial adir

View file

@ -107,7 +107,6 @@ gen r u rc gc rs = do
{ storeExport = storeExportM dir cow { storeExport = storeExportM dir cow
, retrieveExport = retrieveExportM dir cow , retrieveExport = retrieveExportM dir cow
, removeExport = removeExportM dir , removeExport = removeExportM dir
, versionedExport = False
, checkPresentExport = checkPresentExportM dir , checkPresentExport = checkPresentExportM dir
-- Not needed because removeExportLocation -- Not needed because removeExportLocation
-- auto-removes empty directories. -- auto-removes empty directories.

View file

@ -95,7 +95,6 @@ gen rt externalprogram r u rc gc rs
{ storeExport = storeExportM external { storeExport = storeExportM external
, retrieveExport = retrieveExportM external , retrieveExport = retrieveExportM external
, removeExport = removeExportM external , removeExport = removeExportM external
, versionedExport = False
, checkPresentExport = checkPresentExportM external , checkPresentExport = checkPresentExportM external
, removeExportDirectory = Just $ removeExportDirectoryM external , removeExportDirectory = Just $ removeExportDirectoryM external
, renameExport = Just $ renameExportM external , renameExport = Just $ renameExportM external

View file

@ -84,6 +84,7 @@ remote = RemoteType
, configParser = mkRemoteConfigParser , configParser = mkRemoteConfigParser
[ optionalStringParser locationField [ optionalStringParser locationField
(FieldDesc "url of git remote to remember with special remote") (FieldDesc "url of git remote to remember with special remote")
, yesNoParser versioningField (Just False) HiddenField
] ]
, setup = gitSetup , setup = gitSetup
, exportSupported = exportUnsupported , exportSupported = exportUnsupported
@ -229,7 +230,9 @@ gen r u rc gc rs
, gitconfig = gc , gitconfig = gc
, readonly = Git.repoIsHttp r && not (isP2PHttp' gc) , readonly = Git.repoIsHttp r && not (isP2PHttp' gc)
, appendonly = False , appendonly = False
, untrustworthy = False , untrustworthy = isJust (remoteAnnexProxiedBy gc)
&& (exportTree c || importTree c)
&& not (isVersioning c)
, availability = repoAvail r , availability = repoAvail r
, remotetype = remote , remotetype = remote
, mkUnavailable = unavailable r u rc gc rs , mkUnavailable = unavailable r u rc gc rs

View file

@ -1,25 +1,28 @@
{- Helper to make remotes support export and import (or not). {- Helper to make remotes support export and import (or not).
- -
- Copyright 2017-2019 Joey Hess <id@joeyh.name> - Copyright 2017-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE FlexibleInstances, FlexibleContexts #-} {-# LANGUAGE FlexibleInstances, FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Remote.Helper.ExportImport where module Remote.Helper.ExportImport where
import Annex.Common import Annex.Common
import qualified Annex
import Types.Remote import Types.Remote
import Types.Key import Types.Key
import Types.ProposedAccepted import Types.ProposedAccepted
import Annex.Verify
import Remote.Helper.Encryptable (encryptionIsEnabled) import Remote.Helper.Encryptable (encryptionIsEnabled)
import qualified Database.Export as Export import qualified Database.Export as Export
import qualified Database.ContentIdentifier as ContentIdentifier import qualified Database.ContentIdentifier as ContentIdentifier
import Annex.Export import Annex.Export
import Annex.LockFile import Annex.LockFile
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config
import Annex.Verify
import Annex.Content
import Git.Types (fromRef) import Git.Types (fromRef)
import Logs.Export import Logs.Export
import Logs.ContentIdentifier (recordContentIdentifier) import Logs.ContentIdentifier (recordContentIdentifier)
@ -39,7 +42,6 @@ instance HasExportUnsupported (ExportActions Annex) where
, retrieveExport = nope , retrieveExport = nope
, checkPresentExport = \_ _ -> return False , checkPresentExport = \_ _ -> return False
, removeExport = nope , removeExport = nope
, versionedExport = False
, removeExportDirectory = nope , removeExportDirectory = nope
, renameExport = Nothing , renameExport = Nothing
} }
@ -123,15 +125,17 @@ adjustExportImport r rs = do
else importUnsupported else importUnsupported
} }
} }
let annexobjects = isexport && annexObjects (config r)
if not isexport && not isimport if not isexport && not isimport
then return r' then return r'
else adjustExportImport' isexport isimport r' rs else do
gc <- Annex.getGitConfig
adjustExportImport' isexport isimport annexobjects r' rs gc
adjustExportImport' :: Bool -> Bool -> Remote -> RemoteStateHandle -> Annex Remote adjustExportImport' :: Bool -> Bool -> Bool -> Remote -> RemoteStateHandle -> GitConfig -> Annex Remote
adjustExportImport' isexport isimport r rs = do adjustExportImport' isexport isimport annexobjects r rs gc = do
dbv <- prepdbv dbv <- prepdbv
ciddbv <- prepciddb ciddbv <- prepciddb
let versioned = versionedExport (exportActions r)
return $ r return $ r
{ exportActions = if isexport { exportActions = if isexport
then if isimport then if isimport
@ -141,52 +145,55 @@ adjustExportImport' isexport isimport r rs = do
, importActions = if isimport , importActions = if isimport
then importActions r then importActions r
else importUnsupported else importUnsupported
, storeKey = \k af p -> , storeKey = \k af o p ->
-- Storing a key on an export could be implemented, -- Storing a key to an export location could be
-- but it would perform unnecessary work -- implemented, but it would perform unnecessary work
-- when another repository has already stored the -- when another repository has already stored the
-- key, and the local repository does not know -- key, and the local repository does not know
-- about it. To avoid unnecessary costs, don't do it. -- about it. To avoid unnecessary costs, don't do it.
if thirdpartypopulated if thirdpartypopulated
then giveup "remote is not populated by git-annex" then giveup "remote is not populated by git-annex"
else if isexport else if isexport
then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it" then if annexobjects
then storeannexobject k o p
else giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
else if isimport else if isimport
then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it" then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it"
else storeKey r k af p else storeKey r k af o p
, removeKey = \k -> , removeKey = \proof k ->
-- Removing a key from an export would need to -- Removing a key from an export location would need
-- change the tree in the export log to not include -- to change the tree in the export log to not include
-- the file. Otherwise, conflicts when removing -- the file. Otherwise, conflicts when removing
-- files would not be dealt with correctly. -- files would not be dealt with correctly.
-- There does not seem to be a good use case for -- There does not seem to be a good use case for
-- removing a key from an export in any case. -- removing a key from an exported tree in any case.
if thirdpartypopulated if thirdpartypopulated
then giveup "dropping content from this remote is not supported" then giveup "dropping content from this remote is not supported"
else if isexport else if isexport
then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove" then if annexobjects
then removeannexobject dbv k
else giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
else if isimport else if isimport
then giveup "dropping content from this remote is not supported because it is configured with importtree=yes" then giveup "dropping content from this remote is not supported because it is configured with importtree=yes"
else removeKey r k else removeKey r proof k
, lockContent = if versioned , lockContent = if versioned
then lockContent r then lockContent r
else Nothing else Nothing
, retrieveKeyFile = \k af dest p vc -> , retrieveKeyFile = \k af dest p vc ->
if isimport if isimport || isexport
then supportversionedretrieve k af dest p vc $ then supportversionedretrieve k af dest p vc $
retrieveKeyFileFromImport dbv ciddbv k af dest p supportretrieveannexobject dbv k af dest p $
else if isexport retrieveFromImportOrExport (tryexportlocs dbv k) ciddbv k af dest p
then supportversionedretrieve k af dest p vc $ else retrieveKeyFile r k af dest p vc
retrieveKeyFileFromExport dbv k af dest p
else retrieveKeyFile r k af dest p vc
, retrieveKeyFileCheap = if versioned , retrieveKeyFileCheap = if versioned
then retrieveKeyFileCheap r then retrieveKeyFileCheap r
else Nothing else Nothing
, checkPresent = \k -> if versioned , checkPresent = \k -> if versioned
then checkPresent r k then checkPresent r k
else if isimport else if isimport
then anyM (checkPresentImport ciddbv k) then checkpresentwith k $
=<< getanyexportlocs dbv k anyM (checkPresentImport ciddbv k)
=<< getanyexportlocs dbv k
else if isexport else if isexport
-- Check if any of the files a key -- Check if any of the files a key
-- was exported to are present. This -- was exported to are present. This
@ -197,8 +204,9 @@ adjustExportImport' isexport isimport r rs = do
-- to it. Remotes that have such -- to it. Remotes that have such
-- problems are made untrusted, -- problems are made untrusted,
-- so it's not worried about here. -- so it's not worried about here.
then anyM (checkPresentExport (exportActions r) k) then checkpresentwith k $
=<< getanyexportlocs dbv k anyM (checkPresentExport (exportActions r) k)
=<< getanyexportlocs dbv k
else checkPresent r k else checkPresent r k
-- checkPresent from an export is more expensive -- checkPresent from an export is more expensive
-- than otherwise, so not cheap. Also, this -- than otherwise, so not cheap. Also, this
@ -226,13 +234,21 @@ adjustExportImport' isexport isimport r rs = do
then do then do
ts <- map fromRef . exportedTreeishes ts <- map fromRef . exportedTreeishes
<$> getExport (uuid r) <$> getExport (uuid r)
return (is++[("exporttree", "yes"), ("exportedtree", unwords ts)]) return $ is ++ catMaybes
[ Just ("exporttree", "yes")
, Just ("exportedtree", unwords ts)
, if annexobjects
then Just ("annexobjects", "yes")
else Nothing
]
else return is else return is
return $ if isimport && not thirdpartypopulated return $ if isimport && not thirdpartypopulated
then (is'++[("importtree", "yes")]) then (is'++[("importtree", "yes")])
else is' else is'
} }
where where
versioned = isVersioning (config r)
thirdpartypopulated = thirdPartyPopulated (remotetype r) thirdpartypopulated = thirdPartyPopulated (remotetype r)
-- exportActions adjusted to use the equivalent import actions, -- exportActions adjusted to use the equivalent import actions,
@ -313,7 +329,7 @@ adjustExportImport' isexport isimport r rs = do
, liftIO $ atomically (readTMVar dbv) , liftIO $ atomically (readTMVar dbv)
) )
getexportinconflict (_, _, v) = v isexportinconflict (_, _, v) = liftIO $ atomically $ readTVar v
updateexportdb db exportinconflict = updateexportdb db exportinconflict =
Export.updateExportTreeFromLog db >>= \case Export.updateExportTreeFromLog db >>= \case
@ -329,8 +345,8 @@ adjustExportImport' isexport isimport r rs = do
getexportlocs dbv k = do getexportlocs dbv k = do
db <- getexportdb dbv db <- getexportdb dbv
liftIO $ Export.getExportTree db k >>= \case liftIO (Export.getExportTree db k) >>= \case
[] -> ifM (atomically $ readTVar $ getexportinconflict dbv) [] -> ifM (isexportinconflict dbv)
( giveup "unknown export location, likely due to the export conflict" ( giveup "unknown export location, likely due to the export conflict"
, return [] , return []
) )
@ -349,12 +365,16 @@ adjustExportImport' isexport isimport r rs = do
db <- getciddb ciddbv db <- getciddb ciddbv
liftIO $ ContentIdentifier.getContentIdentifiers db rs k liftIO $ ContentIdentifier.getContentIdentifiers db rs k
retrieveFromImportOrExport getlocs ciddbv k af dest p
| isimport = retrieveFromImport getlocs ciddbv k af dest p
| otherwise = retrieveFromExport getlocs k af dest p
-- Keys can be retrieved using retrieveExport, but since that -- Keys can be retrieved using retrieveExport, but since that
-- retrieves from a path in the remote that another writer could -- retrieves from a path in the remote that another writer could
-- have replaced with content not of the requested key, the content -- have replaced with content not of the requested key, the content
-- has to be strongly verified. -- has to be strongly verified.
retrieveKeyFileFromExport dbv k _af dest p = ifM (isVerifiable k) retrieveFromExport getlocs k _af dest p = ifM (isVerifiable k)
( tryexportlocs dbv k $ \loc -> ( getlocs $ \loc ->
retrieveExport (exportActions r) k loc dest p >>= return . \case retrieveExport (exportActions r) k loc dest p >>= return . \case
UnVerified -> MustVerify UnVerified -> MustVerify
IncompleteVerify iv -> MustFinishIncompleteVerify iv IncompleteVerify iv -> MustFinishIncompleteVerify iv
@ -362,28 +382,87 @@ adjustExportImport' isexport isimport r rs = do
, giveup $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend" , giveup $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend"
) )
retrieveKeyFileFromImport dbv ciddbv k af dest p = do retrieveFromImport getlocs ciddbv k af dest p = do
cids <- getkeycids ciddbv k cids <- getkeycids ciddbv k
if not (null cids) if not (null cids)
then tryexportlocs dbv k $ \loc -> then getlocs $ \loc ->
snd <$> retrieveExportWithContentIdentifier (importActions r) loc cids dest (Left k) p snd <$> retrieveExportWithContentIdentifier (importActions r) loc cids dest (Left k) p
-- In case a content identifier is somehow missing, -- In case a content identifier is somehow missing,
-- try this instead. -- try this instead.
else if isexport else if isexport
then retrieveKeyFileFromExport dbv k af dest p then retrieveFromExport getlocs k af dest p
else giveup "no content identifier is recorded, unable to retrieve" else giveup "no content identifier is recorded, unable to retrieve"
-- versionedExport remotes have a key/value store, so can use checkpresentwith k a = ifM a
-- the usual retrieveKeyFile, rather than an import/export ( return True
-- variant. However, fall back to that if retrieveKeyFile fails. , if annexobjects
supportversionedretrieve k af dest p vc a then checkpresentannexobject k
| versionedExport (exportActions r) = else return False
retrieveKeyFile r k af dest p vc )
`catchNonAsync` const a
| otherwise = a
checkPresentImport ciddbv k loc = checkPresentImport ciddbv k loc =
checkPresentExportWithContentIdentifier checkPresentExportWithContentIdentifier
(importActions r) (importActions r)
k loc k loc
=<< getkeycids ciddbv k =<< getkeycids ciddbv k
annexobjectlocation k = exportAnnexObjectLocation gc k
checkpresentannexobject k =
checkPresentExport (exportActions r) k (annexobjectlocation k)
storeannexobject k o p = prepSendAnnex' k o >>= \case
Nothing -> giveup "content is not available"
Just (src, _, checkmodified) -> do
let loc = annexobjectlocation k
storeExport (exportActions r) src k loc p
checkmodified >>= \case
Nothing -> return ()
Just err -> do
removeExport (exportActions r) k loc
giveup err
removeannexobject dbv k =
getanyexportlocs dbv k >>= \case
[] -> ifM (isexportinconflict dbv)
( do
warnExportImportConflict r
giveup "Cannot remove content from the remote until the conflict has been resolved."
, removeExport (exportActions r) k (annexobjectlocation k)
)
_ -> giveup "This key is part of the exported tree, so can only be removed by exporting a tree that does not include it."
retrieveannexobject k af dest p =
retrieveFromExport getlocs k af dest p
where
getlocs a = a (annexobjectlocation k)
supportretrieveannexobject dbv k af dest p a
| annexobjects = tryNonAsync a >>= \case
Right res -> return res
Left err -> tryNonAsync (retrieveannexobject k af dest p) >>= \case
Right res -> return res
-- Both failed, so which exception to
-- throw? If there are known export
-- locations, throw the exception from
-- retrieving from the export locations.
-- If there are no known export locations,
-- throw the exception from retrieving from
-- the annexobjects location.
Left err' -> getanyexportlocs dbv k >>= \case
[] -> ifM (isexportinconflict dbv)
( throwM err
, throwM err'
)
_ -> throwM err
| otherwise = a
-- versioned remotes have a key/value store which
-- the usual retrieveKeyFile can be used with, rather than
-- an import/export variant. However, fall back to that
-- if retrieveKeyFile fails.
supportversionedretrieve k af dest p vc a
| versioned =
retrieveKeyFile r k af dest p vc
`catchNonAsync` const a
| otherwise = a

View file

@ -79,7 +79,6 @@ gen r u rc gc rs = do
{ storeExport = cannotModify { storeExport = cannotModify
, retrieveExport = retriveExportHttpAlso url , retrieveExport = retriveExportHttpAlso url
, removeExport = cannotModify , removeExport = cannotModify
, versionedExport = False
, checkPresentExport = checkPresentExportHttpAlso url , checkPresentExport = checkPresentExportHttpAlso url
, removeExportDirectory = Nothing , removeExportDirectory = Nothing
, renameExport = cannotModify , renameExport = cannotModify

View file

@ -103,7 +103,6 @@ gen r u rc gc rs = do
{ storeExport = storeExportM o { storeExport = storeExportM o
, retrieveExport = retrieveExportM o , retrieveExport = retrieveExportM o
, removeExport = removeExportM o , removeExport = removeExportM o
, versionedExport = False
, checkPresentExport = checkPresentExportM o , checkPresentExport = checkPresentExportM o
, removeExportDirectory = Just (removeExportDirectoryM o) , removeExportDirectory = Just (removeExportDirectoryM o)
, renameExport = Just $ renameExportM o , renameExport = Just $ renameExportM o

View file

@ -143,10 +143,7 @@ storageclassField = Accepted "storageclass"
fileprefixField :: RemoteConfigField fileprefixField :: RemoteConfigField
fileprefixField = Accepted "fileprefix" fileprefixField = Accepted "fileprefix"
versioningField :: RemoteConfigField
versioningField = Accepted "versioning"
publicField :: RemoteConfigField publicField :: RemoteConfigField
publicField = Accepted "public" publicField = Accepted "public"
@ -224,7 +221,6 @@ gen r u rc gc rs = do
{ storeExport = storeExportS3 hdl this rs info magic { storeExport = storeExportS3 hdl this rs info magic
, retrieveExport = retrieveExportS3 hdl this info , retrieveExport = retrieveExportS3 hdl this info
, removeExport = removeExportS3 hdl this rs info , removeExport = removeExportS3 hdl this rs info
, versionedExport = versioning info
, checkPresentExport = checkPresentExportS3 hdl this info , checkPresentExport = checkPresentExportS3 hdl this info
-- S3 does not have directories. -- S3 does not have directories.
, removeExportDirectory = Nothing , removeExportDirectory = Nothing

View file

@ -101,7 +101,6 @@ gen r u rc gc rs = do
, retrieveExport = retrieveExportDav hdl , retrieveExport = retrieveExportDav hdl
, checkPresentExport = checkPresentExportDav hdl this , checkPresentExport = checkPresentExportDav hdl this
, removeExport = removeExportDav hdl , removeExport = removeExportDav hdl
, versionedExport = False
, removeExportDirectory = Just $ , removeExportDirectory = Just $
removeExportDirectoryDav hdl removeExportDirectoryDav hdl
, renameExport = Just $ renameExportDav hdl , renameExport = Just $ renameExportDav hdl

View file

@ -34,7 +34,7 @@ import Control.DeepSeq
-- PINNED in memory which caused memory fragmentation and excessive memory -- PINNED in memory which caused memory fragmentation and excessive memory
-- use. -- use.
newtype ExportLocation = ExportLocation S.ShortByteString newtype ExportLocation = ExportLocation S.ShortByteString
deriving (Show, Eq, Generic) deriving (Show, Eq, Generic, Ord)
instance NFData ExportLocation instance NFData ExportLocation

View file

@ -278,11 +278,6 @@ data ExportActions a = ExportActions
-- Can throw exception if unable to access remote, or if remote -- Can throw exception if unable to access remote, or if remote
-- refuses to remove the content. -- refuses to remove the content.
, removeExport :: Key -> ExportLocation -> a () , removeExport :: Key -> ExportLocation -> a ()
-- Set when the remote is versioned, so once a Key is stored
-- to an ExportLocation, a subsequent deletion of that
-- ExportLocation leaves the key still accessible to retrieveKeyFile
-- and checkPresent.
, versionedExport :: Bool
-- Removes an exported directory. Typically the directory will be -- Removes an exported directory. Typically the directory will be
-- empty, but it could possibly contain files or other directories, -- empty, but it could possibly contain files or other directories,
-- and it's ok to delete those (but not required to). -- and it's ok to delete those (but not required to).

View file

@ -189,8 +189,9 @@ the special remote can reply with `UNSUPPORTED-REQUEST`.
a list of settings with descriptions. Note that the user is not required a list of settings with descriptions. Note that the user is not required
to provided all the settings listed here. A block of responses to provided all the settings listed here. A block of responses
can be made to this, which must always end with `CONFIGEND`. can be made to this, which must always end with `CONFIGEND`.
(Do not include settings like "encryption" that are common to all external (Do not include config like "encryption" that are common to all external
special remotes.) special remotes. Also avoid including a config named "versioning"
unless using it as desribed in the [[export_and_import_appendix]].)
* `CONFIG Name Description` * `CONFIG Name Description`
Indicates the name and description of a config setting. The description Indicates the name and description of a config setting. The description
should be reasonably short. Example: should be reasonably short. Example:

View file

@ -153,13 +153,6 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
Indicates that `IMPORTKEY` can be used. Indicates that `IMPORTKEY` can be used.
* `IMPORTKEYSUPPORTED-FAILURE` * `IMPORTKEYSUPPORTED-FAILURE`
Indicates that `IMPORTKEY` cannot be used. Indicates that `IMPORTKEY` cannot be used.
* `VERSIONED`
Used to check if the special remote is versioned.
Note that this request may be made before or after `PREPARE`.
* `ISVERSIONED`
Indicates that the remote is versioned.
* `NOTVERSIONED`
Indicates that the remote is not versioned.
* `LISTIMPORTABLECONTENTS` * `LISTIMPORTABLECONTENTS`
Used to get a list of all the files that are stored in the special Used to get a list of all the files that are stored in the special
remote. A block of responses remote. A block of responses
@ -178,10 +171,9 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
block of responses. This can be repeated any number of times block of responses. This can be repeated any number of times
(indicating a branching history), and histories can also (indicating a branching history), and histories can also
be nested multiple levels deep. be nested multiple levels deep.
This should only be used when the remote supports using This should only be a response when the remote supports using
"TRANSFER RECEIVE Key" to retrieve historical versions of files. "TRANSFER RECEIVE Key" to retrieve historical versions of files,
And, it should only be used when the remote replies `ISVERSIONED` and when "GETCONFIG versioning" yields "VALUE TRUE".
to the `VERSIONED` message.
* `END` * `END`
Indicates the end of a block of responses. Indicates the end of a block of responses.
* `LOCATION Name` * `LOCATION Name`

View file

@ -545,6 +545,10 @@ it pick which of multiple branches to export?
Perhaps configure the annex-tracking-branch in the git-annex branch? Perhaps configure the annex-tracking-branch in the git-annex branch?
That might be generally useful when working with exporttree=yes remotes. That might be generally useful when working with exporttree=yes remotes.
Or simply configure remote.foo.annex-tracking-branch on the proxy.
This may not meet all use cases, but it's simple and seems like a
reasonable first step.
The first two approaches also have a complication when a key is sent to The first two approaches also have a complication when a key is sent to
the proxy that is not part of the configured annex-tracking-branch. What the proxy that is not part of the configured annex-tracking-branch. What
does the proxy do with it? There seem three possibilities: does the proxy do with it? There seem three possibilities:
@ -610,19 +614,35 @@ were not accessible when it is accessed directly rather than via the proxy.
Simplified design for proxying to exporttree=yes, if those remotes can Simplified design for proxying to exporttree=yes, if those remotes can
store any key: store any key:
* Configure annex-tracking-branch for the proxy in the git-annex branch. * Configure annex-tracking-branch in the proxy's git config.
(For the proxy as a whole, or for specific exporttree=yes repos behind
it?)
* Then the user's workflow is simply: `git-annex push` * Then the user's workflow is simply: `git-annex push`
* The proxy handles PUT/GET/REMOVE of a key that is not in the * The proxy handles PUT by always storing to the special remote's
annex-tracking branch that it currently knows about, by using .git/annex/objects/ location, not updating the exported tree.
the special remote's .git/annex/objects/ location. * The proxy allows REMOVE from the special remote's
* Upon receiving a new annex-tracking-branch or any transfer of a key .git/annex/objects/ location, but not removal of keys
used in the current annex-tracking-branch, the proxy can update that are in the currently exported tree.
the exporttree=yes remote. This needs to happen incrementally, * When `git-annex post-receive` is run by the post-receive hook
eg upon receiving a key, just proxy it on to the exporttree=yes remote, and the annex-tracking-branch has been updated, it exports
and update the export database. Once all keys are received, update the tree to the special remote.
the git-annex branch to indicate a new tree has been exported. (But, `git-annex push` sends the updated tree first, so
this will often be an incomplete export.)
* When there is an incomplete export and a key is received
that is part of that export, check if it is the *last* key
that is needed to complete the export. If so, export the tree to the
special remote again.
(This avoids overhead and complication of incrementally updating
the export. It relies on the special remote supporting renameExport.
Incrementally updating the export might be worth doing eventually,
for special remotes that do no support renameExport.)
* When exporting a tree to the special remote, handle cases
where a single key is used by multiple files, and the key is not
present locally. In this case it currently fails to update
one of the files (and renames the annexobjects location to the other
one). It will need to download the content from the special remote and
send it back to it.
* When the special remote does not support renameExport, will need to
download from the annexobjects location in order to store to the export
location.
## possible enhancement: indirect uploads ## possible enhancement: indirect uploads

View file

@ -14,8 +14,7 @@ Normally files are stored on a git-annex special remote named by their
keys. That is great for reliable data storage, but your filenames are keys. That is great for reliable data storage, but your filenames are
obscured. Exporting replicates the tree to the special remote as-is. obscured. Exporting replicates the tree to the special remote as-is.
Mixing key/value storage and exports in the same remote would be a mess and To use this, you have to configure a special remote with
so is not allowed. You have to configure a special remote with
`exporttree=yes` when initially setting it up with `exporttree=yes` when initially setting it up with
[[git-annex-initremote]](1). [[git-annex-initremote]](1).
@ -78,6 +77,20 @@ so the overwritten modification is not lost.)
Specify the special remote to export to. Specify the special remote to export to.
* `--from=remote`
When the content of a file is not available in the local repository,
this option lets it be downloaded from another remote, and sent on to the
destination remote. The file will be temporarily stored on local disk,
but will never enter the local repository.
This option can be repeated multiple times.
It is possible to use --from with the same remote as --to. If the tree
contains several files with the same content, and the remote being
exported to already contains one copy of the content, this allows making
a copy by downloading the content from it.
* `--tracking` * `--tracking`
This is a deprecated way to set "remote.<name>.annex-tracking-branch". This is a deprecated way to set "remote.<name>.annex-tracking-branch".

View file

@ -17,6 +17,11 @@ for repositories that have an adjusted branch checked
out. The hook updates the work tree when run in such a repository, out. The hook updates the work tree when run in such a repository,
the same as running `git-annex merge` would. the same as running `git-annex merge` would.
When a repository is configured to proxy to a special remote with
exporttree=yes, and the configured remote.name.annex-tracking-branch
is received, the hook handles updating the tree exported to the
special remote.
# OPTIONS # OPTIONS
* The [[git-annex-common-options]](1) can be used. * The [[git-annex-common-options]](1) can be used.
@ -29,6 +34,8 @@ the same as running `git-annex merge` would.
[[git-annex-merge]](1) [[git-annex-merge]](1)
[[git-annex-export]](1)
# AUTHOR # AUTHOR
Joey Hess <id@joeyh.name> Joey Hess <id@joeyh.name>

View file

@ -92,8 +92,8 @@ See [[git-annex-preferred-content]](1).
This option can be repeated multiple times with different paths. This option can be repeated multiple times with different paths.
Note that this option is ignored when syncing with "exporttree=yes" Note that this option does not prevent exporting other files to an
remotes. "exporttree=yes" remote.
* `--all` `-A` * `--all` `-A`

View file

@ -37,8 +37,8 @@ do so by using eg `approxlackingcopies=1`.
This option can be repeated multiple times with different paths. This option can be repeated multiple times with different paths.
Note that this option is ignored when syncing with "exporttree=yes" Note that this option does not prevent exporting other files to an
remotes. "exporttree=yes" remote.
* `--jobs=N` `-JN` * `--jobs=N` `-JN`

View file

@ -28,6 +28,14 @@ a proxy.
Proxies can only be accessed via ssh or by an annex+http url. Proxies can only be accessed via ssh or by an annex+http url.
To set up proxying to a special remote that is configured with
exporttree=yes, its necessary for it to also be configured with
annexobjects=yes. And, "remote.<name>.annex-tracking-branch" needs to
be configured to the branch that will be exported to the special remote.
When that branch is pushed to the proxy, it will update the tree exported
to the special remote. When files are copied to the remote via the proxy,
it will also update the exported tree.
# OPTIONS # OPTIONS
* The [[git-annex-common-options]](1) can be used. * The [[git-annex-common-options]](1) can be used.
@ -36,6 +44,7 @@ Proxies can only be accessed via ssh or by an annex+http url.
* [[git-annex]](1) * [[git-annex]](1)
* [[git-annex-updatecluster]](1) * [[git-annex-updatecluster]](1)
* [[git-annex-export]](1)
# AUTHOR # AUTHOR

View file

@ -351,7 +351,6 @@ content from the key-value store.
See [[git-annex-extendcluster](1) for details. See [[git-annex-extendcluster](1) for details.
* `updateproxy` * `updateproxy`
Update records with proxy configuration. Update records with proxy configuration.

View file

@ -125,6 +125,11 @@ the S3 remote.
When versioning is not enabled, this risks data loss, and so git-annex When versioning is not enabled, this risks data loss, and so git-annex
will not let you enable a remote with that configuration unless forced. will not let you enable a remote with that configuration unless forced.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `publicurl` - Configure the URL that is used to download files * `publicurl` - Configure the URL that is used to download files
from the bucket. Using this with a S3 bucket that has been configured from the bucket. Using this with a S3 bucket that has been configured
to allow anyone to download its content allows git-annex to download to allow anyone to download its content allows git-annex to download

View file

@ -32,6 +32,11 @@ the adb remote.
by [[git-annex-import]]. When set in combination with exporttree, by [[git-annex-import]]. When set in combination with exporttree,
this lets files be imported from it, and changes exported back to it. this lets files be imported from it, and changes exported back to it.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `oldandroid` - Set to "yes" if your Android device is too old * `oldandroid` - Set to "yes" if your Android device is too old
to support `find -printf`. Enabling this will make importing slower. to support `find -printf`. Enabling this will make importing slower.
If you see an error like "bad arg '-printf'", you can enable this If you see an error like "bad arg '-printf'", you can enable this

View file

@ -41,6 +41,11 @@ remote:
by [[git-annex-import]]. It will not be usable as a general-purpose by [[git-annex-import]]. It will not be usable as a general-purpose
special remote. special remote.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
directory.
* `ignoreinodes` - Usually when importing, the inode numbers * `ignoreinodes` - Usually when importing, the inode numbers
of files are used to detect when files have changed. Since some of files are used to detect when files have changed. Since some
filesystems generate new inode numbers each time they are mounted, filesystems generate new inode numbers each time they are mounted,

View file

@ -32,6 +32,9 @@ for a list of known working combinations.
Setting this does not allow trees to be exported to the httpalso remote, Setting this does not allow trees to be exported to the httpalso remote,
because it's read-only. But it does let exported files be downloaded because it's read-only. But it does let exported files be downloaded
from it. from it.
* `annexobjects` - If the other special remote has `annexobjects=yes`
set (along with `exporttree=yes`), it also needs to be set when
initializing the httpalso remote.
Configuration of encryption and chunking is inherited from the other Configuration of encryption and chunking is inherited from the other
special remote, and does not need to be specified when initializing the special remote, and does not need to be specified when initializing the

View file

@ -26,6 +26,11 @@ These parameters can be passed to `git annex initremote` to configure rsync:
by [[git-annex-export]]. It will not be usable as a general-purpose by [[git-annex-export]]. It will not be usable as a general-purpose
special remote. special remote.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `shellescape` - Optional. This has no effect when using rsync 3.2.4 or * `shellescape` - Optional. This has no effect when using rsync 3.2.4 or
newer. Set to "no" to avoid shell escaping newer. Set to "no" to avoid shell escaping
normally done when using older versions of rsync over ssh. That escaping normally done when using older versions of rsync over ssh. That escaping

View file

@ -33,6 +33,11 @@ the webdav remote.
by [[git-annex-export]]. It will not be usable as a general-purpose by [[git-annex-export]]. It will not be usable as a general-purpose
special remote. special remote.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `chunk` - Enables [[chunking]] when storing large files. * `chunk` - Enables [[chunking]] when storing large files.
* `chunksize` - Deprecated version of chunk parameter above. * `chunksize` - Deprecated version of chunk parameter above.

View file

@ -16,9 +16,6 @@ keys, in order to support exporttree=yes remotes.
Another place this would be useful is Another place this would be useful is
[[proxying to exporttree=yes special remotes|design/passthrough_proxy]]. [[proxying to exporttree=yes special remotes|design/passthrough_proxy]].
This could also solve [[todo/export_paired_rename_innefficenctcy]]
cleanly.
With this change, a user could just `git-annex copy --to remote` With this change, a user could just `git-annex copy --to remote`
and copy whatever files they want into it. Then later and copy whatever files they want into it. Then later
`git-annex export master --to remote` would efficiently update the tree `git-annex export master --to remote` would efficiently update the tree
@ -52,6 +49,13 @@ surprising for an existing user!
Perhaps this should not be "exportree=yes", but something else. Perhaps this should not be "exportree=yes", but something else.
> Currently, if a remote is configured with "exporttree=foo", that
> is treated the same as "exporttree=no". So this will need to be
> a config added to exporttree=yes in order to interoperate
> with old git-annex.
>
> Call it "exporttree=yes annexobjects=yes" --[[Joey]]
---- ----
Consider two repositories A and B that both have access to the same Consider two repositories A and B that both have access to the same
@ -60,16 +64,58 @@ exporttree=yes special remote R.
* A exports tree T1 to R * A exports tree T1 to R
* B pulls from A, so knows R has tree T1 * B pulls from A, so knows R has tree T1
* A exports tree T2 to R, which deletes file `foo`. So * A exports tree T2 to R, which deletes file `foo`. So
it is moved to R's .git/annex/objects/ it is moved to R's .git/annex/objects. Or, alternatively,
`foo` is deleted, and the key is then copied to R again,
also to .git/annex/objects.
* B exports tree T2 to R also. So B deletes file `foo`. But it was not * B exports tree T2 to R also. So B deletes file `foo`. But it was not
present anyway. If B then marks the key as not present in R, we will have present anyway. If B then marks the key as not present in R, we will have
lost track of the fact that A moved it to the objects location. lost track of the fact that A moved it to the objects location.
So, when calling removeExport, have to also check if the key is present in So, when calling removeExport, have to also check if the key is present in
the objects location. If so, don't record the key as missing. (Or course, the objects location. If so, either don't record the key as missing, or
it already checks if some other exported file also has the content of the also remove from the objects location.
key.)
----
Could a remote with annexobjects=yet and exporttree=yes but without
importtree=yes not be forced to be untrusted?
If not, the retrieval from the annexobjects location needs to do strong
verification of the content.
If the annexobjects directory only gets keys uploaded to it, and never had
exported files renamed into it, its content will always be as expected, and
perhaps the remote does not need to be untrusted.
OTOH, if an exported file that is being deleted in an
updated export gets renamed into the annexobjects directory, it's possible
that the file has in fact been overwritten with other content (by git-annex
in another clone of the repository), and so the object in annexobjects
would not be as expected. So unfortunately, it seems that rename can't be
done without forcing untrusted.
Note that, exporting a new tree can still delete any file at any time.
If the remote is not untrusted, that could violate numcopies.
So, performUnexport would need to check numcopies first, when using such a
remote.
Even if they are not untrusted, an exported file can't be counted as a
copy. Only a file in the annexobjects location can be. So the remote's
checkPresent will perhaps need to return false for files that are exported?
But surely other things than numcopies use checkPresent. So this might need
a change to checkPresent's type to indicate the difference.
Crazy idea: Split the remote into two uuids. Use one for
the annexobjects directory, and the other for the exported files. This
clean separation avoids the above problem. But would be confusing for the
user. HOWEVER, what if the two were treated as parts of the same cluster....?
This may be worth revisiting later, but for now, I am leaning to keeping it
untrusted, and following down that line to make it as performant as
possible.
--- ---
Implementing in the "exportreeplus" branch --[[Joey]] Implementing in the "exportreeplus" branch --[[Joey]]
> [[done]] --[[Joey]]

View file

@ -31,7 +31,30 @@ Planned schedule of work:
## work notes ## work notes
* Working on `exportreeplus` branch which is groundwork for proxying to * Working on `exportreeplus` branch which is groundwork for proxying to
exporttree=yes special remotes. exporttree=yes special remotes. Need to merge it to master.
## completed items for August
* Special remotes configured with exporttree=yes annexobjects=yes
can store objects in .git/annex/objects, as well as an exported tree.
* Support proxying to special remotes configured with
exporttree=yes annexobjects=yes.
* post-retrieve: When proxying is enabled for an exporttree=yes
special remote and the configured remote.name.annex-tracking-branch
is received, the tree is exported to the special remote.
* When getting from a P2P HTTP remote, prompt for credentials when
required, instead of failing.
* Prevent `updateproxy` and `updatecluster` from adding
an exporttree=yes special remote that does not have
annexobjects=yes, to avoid foot shooting.
* Implement `git-annex export treeish --to=foo --from=bar`, which
gets from bar as needed to send to foo. Make post-retrieve use
`--to=r --from=r` to handle the multiple files case.
## items deferred until later for p2p protocol over http ## items deferred until later for p2p protocol over http