initial support for annexobjects=yes

Works but some commands may need changes to support special remotes
configured this way.
This commit is contained in:
Joey Hess 2024-08-02 14:07:45 -04:00
parent 169fd414eb
commit 28b29f63dc
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
14 changed files with 163 additions and 41 deletions

View file

@ -27,6 +27,7 @@ module Annex.Locations (
gitAnnexInodeSentinalCache,
annexLocationsBare,
annexLocationsNonBare,
annexLocation,
gitAnnexDir,
gitAnnexObjectDir,
gitAnnexTmpOtherDir,

View file

@ -100,6 +100,12 @@ exportTree = fromMaybe False . getRemoteConfigValue exportTreeField
importTree :: ParsedRemoteConfig -> Bool
importTree = fromMaybe False . getRemoteConfigValue importTreeField
annexObjectsField :: RemoteConfigField
annexObjectsField = Accepted "annexobjects"
annexObjects :: ParsedRemoteConfig -> Bool
annexObjects = fromMaybe False . getRemoteConfigValue annexObjectsField
{- Parsers for fields that are common to all special remotes. -}
commonFieldParsers :: [RemoteConfigFieldParser]
commonFieldParsers =
@ -124,6 +130,8 @@ essentialFieldParsers =
(FieldDesc "export trees of files to this remote")
, yesNoParser importTreeField (Just False)
(FieldDesc "import trees of files from this remote")
, yesNoParser annexObjectsField (Just False)
(FieldDesc "store other objects in remote along with exported trees")
]
autoEnableFieldParser :: RemoteConfigFieldParser

View file

@ -1,6 +1,10 @@
git-annex (10.20240732) UNRELEASED; urgency=medium
* Avoid loading cluster log at startup.
* Remotes configured with exporttree=yes annexobjects=yes
can store objects in .git/annex/objects, as well as an exported tree.
* git-remote-annex: Store objects in exportree=yes special remotes
in the same paths used by annexobjects=yes.
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400

View file

@ -1,30 +1,35 @@
{- Helper to make remotes support export and import (or not).
-
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
- Copyright 2017-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE FlexibleInstances, FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module Remote.Helper.ExportImport where
import Annex.Common
import qualified Annex
import Types.Remote
import Types.Key
import Types.ProposedAccepted
import Annex.Verify
import Types.Export
import Remote.Helper.Encryptable (encryptionIsEnabled)
import qualified Database.Export as Export
import qualified Database.ContentIdentifier as ContentIdentifier
import Annex.Export
import Annex.LockFile
import Annex.SpecialRemote.Config
import Annex.Verify
import Annex.Content
import Git.Types (fromRef)
import Logs.Export
import Logs.ContentIdentifier (recordContentIdentifier)
import Control.Concurrent.STM
import qualified System.FilePath.ByteString as P
-- | Use for remotes that do not support exports.
class HasExportUnsupported a where
@ -123,12 +128,15 @@ adjustExportImport r rs = do
else importUnsupported
}
}
let annexobjects = isexport && annexObjects (config r)
if not isexport && not isimport
then return r'
else adjustExportImport' isexport isimport r' rs
else do
gc <- Annex.getGitConfig
adjustExportImport' isexport isimport annexobjects r' rs gc
adjustExportImport' :: Bool -> Bool -> Remote -> RemoteStateHandle -> Annex Remote
adjustExportImport' isexport isimport r rs = do
adjustExportImport' :: Bool -> Bool -> Bool -> Remote -> RemoteStateHandle -> GitConfig -> Annex Remote
adjustExportImport' isexport isimport annexobjects r rs gc = do
dbv <- prepdbv
ciddbv <- prepciddb
let versioned = versionedExport (exportActions r)
@ -141,43 +149,49 @@ adjustExportImport' isexport isimport r rs = do
, importActions = if isimport
then importActions r
else importUnsupported
, storeKey = \k af p ->
-- Storing a key on an export could be implemented,
-- but it would perform unnecessary work
, storeKey = \k af o p ->
-- Storing a key to an export location could be
-- implemented, but it would perform unnecessary work
-- when another repository has already stored the
-- key, and the local repository does not know
-- about it. To avoid unnecessary costs, don't do it.
if thirdpartypopulated
then giveup "remote is not populated by git-annex"
else if isexport
then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
then if annexobjects
then storeannexobject k o p
else giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
else if isimport
then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it"
else storeKey r k af p
, removeKey = \k ->
-- Removing a key from an export would need to
-- change the tree in the export log to not include
else storeKey r k af o p
, removeKey = \proof k ->
-- Removing a key from an export location would need
-- to change the tree in the export log to not include
-- the file. Otherwise, conflicts when removing
-- files would not be dealt with correctly.
-- There does not seem to be a good use case for
-- removing a key from an export in any case.
-- removing a key from an exported tree in any case.
if thirdpartypopulated
then giveup "dropping content from this remote is not supported"
else if isexport
then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
then if annexobjects
then removeannexobject k
else giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
else if isimport
then giveup "dropping content from this remote is not supported because it is configured with importtree=yes"
else removeKey r k
else removeKey r proof k
, lockContent = if versioned
then lockContent r
else Nothing
, retrieveKeyFile = \k af dest p vc ->
if isimport
then supportversionedretrieve k af dest p vc $
retrieveKeyFileFromImport dbv ciddbv k af dest p
supportretrieveannexobject dbv k dest p $
retrieveKeyFileFromImport dbv ciddbv k af dest p
else if isexport
then supportversionedretrieve k af dest p vc $
retrieveKeyFileFromExport dbv k af dest p
supportretrieveannexobject dbv k dest p $
retrieveKeyFileFromExport dbv k af dest p
else retrieveKeyFile r k af dest p vc
, retrieveKeyFileCheap = if versioned
then retrieveKeyFileCheap r
@ -185,8 +199,9 @@ adjustExportImport' isexport isimport r rs = do
, checkPresent = \k -> if versioned
then checkPresent r k
else if isimport
then anyM (checkPresentImport ciddbv k)
=<< getanyexportlocs dbv k
then checkpresentwith k $
anyM (checkPresentImport ciddbv k)
=<< getanyexportlocs dbv k
else if isexport
-- Check if any of the files a key
-- was exported to are present. This
@ -197,8 +212,9 @@ adjustExportImport' isexport isimport r rs = do
-- to it. Remotes that have such
-- problems are made untrusted,
-- so it's not worried about here.
then anyM (checkPresentExport (exportActions r) k)
=<< getanyexportlocs dbv k
then checkpresentwith k $
anyM (checkPresentExport (exportActions r) k)
=<< getanyexportlocs dbv k
else checkPresent r k
-- checkPresent from an export is more expensive
-- than otherwise, so not cheap. Also, this
@ -226,7 +242,13 @@ adjustExportImport' isexport isimport r rs = do
then do
ts <- map fromRef . exportedTreeishes
<$> getExport (uuid r)
return (is++[("exporttree", "yes"), ("exportedtree", unwords ts)])
return $ is ++ catMaybes
[ Just ("exporttree", "yes")
, Just ("exportedtree", unwords ts)
, if annexobjects
then Just ("annexobjects", "yes")
else Nothing
]
else return is
return $ if isimport && not thirdpartypopulated
then (is'++[("importtree", "yes")])
@ -313,7 +335,7 @@ adjustExportImport' isexport isimport r rs = do
, liftIO $ atomically (readTMVar dbv)
)
getexportinconflict (_, _, v) = v
isexportinconflict (_, _, v) = liftIO $ atomically $ readTVar v
updateexportdb db exportinconflict =
Export.updateExportTreeFromLog db >>= \case
@ -329,8 +351,8 @@ adjustExportImport' isexport isimport r rs = do
getexportlocs dbv k = do
db <- getexportdb dbv
liftIO $ Export.getExportTree db k >>= \case
[] -> ifM (atomically $ readTVar $ getexportinconflict dbv)
liftIO (Export.getExportTree db k) >>= \case
[] -> ifM (isexportinconflict dbv)
( giveup "unknown export location, likely due to the export conflict"
, return []
)
@ -372,18 +394,71 @@ adjustExportImport' isexport isimport r rs = do
else if isexport
then retrieveKeyFileFromExport dbv k af dest p
else giveup "no content identifier is recorded, unable to retrieve"
-- versionedExport remotes have a key/value store, so can use
-- the usual retrieveKeyFile, rather than an import/export
-- variant. However, fall back to that if retrieveKeyFile fails.
supportversionedretrieve k af dest p vc a
| versionedExport (exportActions r) =
retrieveKeyFile r k af dest p vc
`catchNonAsync` const a
| otherwise = a
checkpresentwith k a = ifM a
( return True
, if annexobjects
then checkpresentannexobject k
else return False
)
checkPresentImport ciddbv k loc =
checkPresentExportWithContentIdentifier
(importActions r)
k loc
=<< getkeycids ciddbv k
-- For annexobjects=true, objects are stored in the remote
-- in a location under .git/objects/
annexobjectlocation k = mkExportLocation $
".git" P.</> annexLocation gc k hashDirLower
checkpresentannexobject k =
checkPresentExport (exportActions r) k (annexobjectlocation k)
storeannexobject k o p = prepSendAnnex' k o >>= \case
Nothing -> giveup "content is not available"
Just (src, _, checkmodified) -> do
let loc = annexobjectlocation k
storeExport (exportActions r) src k loc p
checkmodified >>= \case
Nothing -> return ()
Just err -> do
removeExport (exportActions r) k loc
giveup err
removeannexobject k =
removeExport (exportActions r) k (annexobjectlocation k)
retrieveannexobject k dest p =
retrieveExport (exportActions r) k (annexobjectlocation k) dest p
supportretrieveannexobject dbv k dest p a
| annexobjects = tryNonAsync a >>= \case
Right res -> return res
Left err -> tryNonAsync (retrieveannexobject k dest p) >>= \case
Right res -> return res
-- Both failed, so which exception to
-- throw? If there are known export
-- locations, throw the exception from
-- retrieving from the export locations.
-- If there are no known export locations,
-- throw the exception from retrieving from
-- the annexobjects location.
Left err' -> getanyexportlocs dbv k >>= \case
[] -> ifM (isexportinconflict dbv)
( throwM err
, throwM err'
)
_ -> throwM err
| otherwise = a
-- versionedExport remotes have a key/value store which
-- the usual retrieveKeyFile can be used with, rather than
-- an import/export variant. However, fall back to that
-- if retrieveKeyFile fails.
supportversionedretrieve k af dest p vc a
| versionedExport (exportActions r) =
retrieveKeyFile r k af dest p vc
`catchNonAsync` const a
| otherwise = a

View file

@ -14,8 +14,7 @@ Normally files are stored on a git-annex special remote named by their
keys. That is great for reliable data storage, but your filenames are
obscured. Exporting replicates the tree to the special remote as-is.
Mixing key/value storage and exports in the same remote would be a mess and
so is not allowed. You have to configure a special remote with
To use this, you have to configure a special remote with
`exporttree=yes` when initially setting it up with
[[git-annex-initremote]](1).

View file

@ -92,8 +92,8 @@ See [[git-annex-preferred-content]](1).
This option can be repeated multiple times with different paths.
Note that this option is ignored when syncing with "exporttree=yes"
remotes.
Note that this option does not prevent exporting other files to an
"exporttree=yes" remote.
* `--all` `-A`

View file

@ -37,8 +37,8 @@ do so by using eg `approxlackingcopies=1`.
This option can be repeated multiple times with different paths.
Note that this option is ignored when syncing with "exporttree=yes"
remotes.
Note that this option does not prevent exporting other files to an
"exporttree=yes" remote.
* `--jobs=N` `-JN`

View file

@ -125,6 +125,11 @@ the S3 remote.
When versioning is not enabled, this risks data loss, and so git-annex
will not let you enable a remote with that configuration unless forced.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `publicurl` - Configure the URL that is used to download files
from the bucket. Using this with a S3 bucket that has been configured
to allow anyone to download its content allows git-annex to download

View file

@ -32,6 +32,11 @@ the adb remote.
by [[git-annex-import]]. When set in combination with exporttree,
this lets files be imported from it, and changes exported back to it.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `oldandroid` - Set to "yes" if your Android device is too old
to support `find -printf`. Enabling this will make importing slower.
If you see an error like "bad arg '-printf'", you can enable this

View file

@ -41,6 +41,11 @@ remote:
by [[git-annex-import]]. It will not be usable as a general-purpose
special remote.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
directory.
* `ignoreinodes` - Usually when importing, the inode numbers
of files are used to detect when files have changed. Since some
filesystems generate new inode numbers each time they are mounted,

View file

@ -32,6 +32,9 @@ for a list of known working combinations.
Setting this does not allow trees to be exported to the httpalso remote,
because it's read-only. But it does let exported files be downloaded
from it.
* `annexobjects` - If the other special remote has `annexobjects=yes`
set (along with `exporttree=yes`), it also needs to be set when
initializing the httpalso remote.
Configuration of encryption and chunking is inherited from the other
special remote, and does not need to be specified when initializing the

View file

@ -26,6 +26,11 @@ These parameters can be passed to `git annex initremote` to configure rsync:
by [[git-annex-export]]. It will not be usable as a general-purpose
special remote.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `shellescape` - Optional. This has no effect when using rsync 3.2.4 or
newer. Set to "no" to avoid shell escaping
normally done when using older versions of rsync over ssh. That escaping

View file

@ -33,6 +33,11 @@ the webdav remote.
by [[git-annex-export]]. It will not be usable as a general-purpose
special remote.
* `annexobjects` - When set to "yes" along with "exporttree=yes",
this allows storing other objects in the remote along with the
exported tree. They will be stored under .git/annex/objects/ in the
remote.
* `chunk` - Enables [[chunking]] when storing large files.
* `chunksize` - Deprecated version of chunk parameter above.

View file

@ -52,6 +52,13 @@ surprising for an existing user!
Perhaps this should not be "exportree=yes", but something else.
> Currently, if a remote is configured with "exporttree=foo", that
> is treated the same as "exporttree=no". So this will need to be
> a config added to exporttree=yes in order to interoperate
> with old git-annex.
>
> Call it "exporttree=yes annexobjects=yes" --[[Joey]]
----
Consider two repositories A and B that both have access to the same