Merge branch 'master' into borg

This commit is contained in:
Joey Hess 2020-12-17 16:36:15 -04:00
commit f0a495fa05
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 282 additions and 158 deletions

View file

@ -43,10 +43,15 @@ exportKey sha = mk <$> catKey sha
warnExportImportConflict :: Remote -> Annex ()
warnExportImportConflict r = do
ops <- Remote.isImportSupported r >>= return . \case
True -> "exported to and/or imported from"
False -> "exported to"
toplevelWarning True $
"Conflict detected. Different trees have been " ++ ops ++
Remote.name r ++
". Use git-annex export to resolve this conflict."
isimport <- Remote.isImportSupported r
isexport <- Remote.isExportSupported r
let (ops, resolvcmd) = case (isexport, isimport) of
(False, True) -> ("imported from", "git-annex import")
(True, False) -> ("exported to", "git-annex export")
_ -> ("exported to and/or imported from", "git-annex export")
toplevelWarning True $ unwords
[ "Conflict detected. Different trees have been"
, ops, Remote.name r ++ ". Use"
, resolvcmd
, "to resolve this conflict."
]

View file

@ -66,11 +66,12 @@ trustMapLoad :: Annex TrustMap
trustMapLoad = do
overrides <- Annex.getState Annex.forcetrust
l <- remoteList
-- Exports are not trusted, since they are not key/value stores.
-- This does not apply to appendonly exports, which are key/value
-- stores.
-- Export/import remotes are not trusted, since they are not
-- key/value stores. (Unless they are appendonly remotes.)
let isexportimport r = Types.Remote.isExportSupported r
<||> Types.Remote.isImportSupported r
let untrustworthy r = pure (not (Types.Remote.appendonly r))
<&&> Types.Remote.isExportSupported r
<&&> isexportimport r
exports <- filterM untrustworthy l
let exportoverrides = M.fromList $
map (\r -> (Types.Remote.uuid r, UnTrusted)) exports

View file

@ -101,147 +101,142 @@ adjustExportImportRemoteType rt = rt { setup = setup' }
-- | Adjust a remote to support exporttree=yes and/or importree=yes.
adjustExportImport :: Remote -> RemoteStateHandle -> Annex Remote
adjustExportImport rmt rs = do
adjustExportImport r rs = do
isexport <- pure (exportTree (config r)) <&&> isExportSupported r
isimport <- pure (importTree (config r)) <&&> isImportSupported r
let r' = r
{ remotetype = (remotetype r)
{ exportSupported = if isexport
then exportSupported (remotetype r)
else exportUnsupported
, importSupported = if isimport
then importSupported (remotetype r)
else importUnsupported
}
}
if not isexport && not isimport
then return r'
else adjustExportImport' isexport isimport r' rs
adjustExportImport' :: Bool -> Bool -> Remote -> RemoteStateHandle -> Annex Remote
adjustExportImport' isexport isimport r rs = do
dbv <- prepdbv
case (exportTree (config rmt), importTree (config rmt)) of
(True, True) -> isimport dbv =<< isexport dbv rmt
(True, False) -> notimport =<< isexport dbv rmt
(False, True) -> notexport =<< isimport dbv rmt
(False, False) -> notimport =<< notexport rmt
where
notexport r = return $ r
{ exportActions = exportUnsupported
, remotetype = (remotetype r)
{ exportSupported = exportUnsupported
}
}
notimport r = return $ r
{ importActions = importUnsupported
, remotetype = (remotetype r)
{ importSupported = importUnsupported
}
}
isimport dbv r = do
ciddbv <- prepciddb
let keycids k = do
db <- getciddb ciddbv
liftIO $ ContentIdentifier.getContentIdentifiers db rs k
let checkpresent k loc =
checkPresentExportWithContentIdentifier
(importActions r)
k loc
=<< keycids k
return $ r
{ exportActions = (exportActions r)
{ storeExport = \f k loc p -> do
db <- getciddb ciddbv
exportdb <- getexportdb r dbv
oldks <- liftIO $ Export.getExportTreeKey exportdb loc
oldcids <- liftIO $ concat
<$> mapM (ContentIdentifier.getContentIdentifiers db rs) oldks
newcid <- storeExportWithContentIdentifier (importActions r) f k loc oldcids p
withExclusiveLock gitAnnexContentIdentifierLock $ do
liftIO $ ContentIdentifier.recordContentIdentifier db rs newcid k
liftIO $ ContentIdentifier.flushDbQueue db
recordContentIdentifier rs newcid k
, removeExport = \k loc ->
removeExportWithContentIdentifier (importActions r) k loc
=<< keycids k
, removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r)
-- renameExport is optional, and the
-- remote's implementation may
-- lose modifications to the file
-- (by eg copying and then deleting)
-- so don't use it
, renameExport = \_ _ _ -> return Nothing
, checkPresentExport = checkpresent
}
, checkPresent = if appendonly r
then checkPresent r
else \k -> anyM (checkpresent k)
=<< getexportlocs r dbv k
, getInfo = do
is <- getInfo r
return (is++[("import", "yes")])
}
isexport dbv r = ifM (isExportSupported r)
( isexport' dbv r
, notexport r
)
isexport' dbv r = return $ r
-- Storing a key on an export could be implemented,
-- but it would perform unncessary work
-- when another repository has already stored the
-- key, and the local repository does not know
-- about it. To avoid unnecessary costs, don't do it.
{ storeKey = \_ _ _ ->
giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
-- Keys can be retrieved using retrieveExport,
-- but since that retrieves from a path in the
-- remote that another writer could have replaced
-- with content not of the requested key,
-- the content has to be strongly verified.
--
-- appendonly remotes have a key/value store,
-- so don't need to use retrieveExport. However,
-- fall back to it if retrieveKeyFile fails.
, retrieveKeyFile = \k af dest p ->
let retrieveexport = retrieveKeyFileFromExport r dbv k af dest p
in if appendonly r
then retrieveKeyFile r k af dest p
`catchNonAsync` const retrieveexport
else retrieveexport
, retrieveKeyFileCheap = if appendonly r
then retrieveKeyFileCheap r
else Nothing
-- Removing a key from an export would need to
-- change the tree in the export log to not include
-- the file. Otherwise, conflicts when removing
-- files would not be dealt with correctly.
-- There does not seem to be a good use case for
-- removing a key from an export in any case.
, removeKey = \_k -> giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
-- Can't lock content on exports, since they're
-- not key/value stores, and someone else could
-- change what's exported to a file at any time.
--
-- (except for appendonly remotes)
, lockContent = if appendonly r
ciddbv <- prepciddb
let normal = not isexport && not isimport
let iskeyvaluestore = normal || appendonly r
return $ r
{ exportActions = if isexport
then if isimport
then exportActionsForImport dbv ciddbv (exportActions r)
else exportActions r
else exportUnsupported
, importActions = if isimport
then importActions r
else importUnsupported
, storeKey = \k af p ->
-- Storing a key on an export could be implemented,
-- but it would perform unncessary work
-- when another repository has already stored the
-- key, and the local repository does not know
-- about it. To avoid unnecessary costs, don't do it.
if isexport
then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it"
else if isimport
then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it"
else storeKey r k af p
, removeKey = \k ->
-- Removing a key from an export would need to
-- change the tree in the export log to not include
-- the file. Otherwise, conflicts when removing
-- files would not be dealt with correctly.
-- There does not seem to be a good use case for
-- removing a key from an export in any case.
if isexport
then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove"
else if isimport
then giveup "dropping content from this remote is not supported because it is configured with importtree=yes"
else removeKey r k
, lockContent = if iskeyvaluestore
then lockContent r
else Nothing
-- Check if any of the files a key was exported to
-- are present. This doesn't guarantee the export
-- contains the right content, which is why export
-- remotes are untrusted.
--
-- (but appendonly remotes work the same as any
-- non-export remote)
, checkPresent = if appendonly r
then checkPresent r
else \k -> anyM (checkPresentExport (exportActions r) k)
=<< getexportlocs r dbv k
, retrieveKeyFile = \k af dest p ->
if isimport
then supportappendonlyretrieve k af dest p $
retrieveKeyFileFromImport dbv ciddbv k af dest p
else if isexport
then supportappendonlyretrieve k af dest p $
retrieveKeyFileFromExport dbv k af dest p
else retrieveKeyFile r k af dest p
, retrieveKeyFileCheap = if iskeyvaluestore
then retrieveKeyFileCheap r
else Nothing
, checkPresent = \k -> if appendonly r
then checkPresent r k
else if isimport
then anyM (checkPresentImport ciddbv k)
=<< getexportlocs dbv k
else if isexport
-- Check if any of the files a key
-- was exported to are present. This
-- doesn't guarantee the export
-- contains the right content,
-- which is why export remotes
-- are untrusted.
then anyM (checkPresentExport (exportActions r) k)
=<< getexportlocs dbv k
else checkPresent r k
-- checkPresent from an export is more expensive
-- than otherwise, so not cheap. Also, this
-- avoids things that look at checkPresentCheap and
-- silently skip non-present files from behaving
-- in confusing ways when there's an export
-- conflict.
, checkPresentCheap = False
, mkUnavailable = return Nothing
-- conflict (or an import conflict).
, checkPresentCheap = if normal
then checkPresentCheap r
else False
-- git-annex testremote cannot be used to test
-- import/export since it stores keys.
, mkUnavailable = if normal
then mkUnavailable r
else return Nothing
, getInfo = do
ts <- map fromRef . exportedTreeishes
<$> getExport (uuid r)
is <- getInfo r
return (is++[("export", "yes"), ("exportedtree", unwords ts)])
is' <- if isexport
then do
ts <- map fromRef . exportedTreeishes
<$> getExport (uuid r)
return (is++[("export", "yes"), ("exportedtree", unwords ts)])
else return is
return $ if isimport
then (is'++[("import", "yes")])
else is'
}
where
-- exportActions adjusted to use the equivilant import actions,
-- which take ContentIdentifiers into account.
exportActionsForImport dbv ciddbv ea = ea
{ storeExport = \f k loc p -> do
db <- getciddb ciddbv
exportdb <- getexportdb dbv
oldks <- liftIO $ Export.getExportTreeKey exportdb loc
oldcids <- liftIO $ concat
<$> mapM (ContentIdentifier.getContentIdentifiers db rs) oldks
newcid <- storeExportWithContentIdentifier (importActions r) f k loc oldcids p
withExclusiveLock gitAnnexContentIdentifierLock $ do
liftIO $ ContentIdentifier.recordContentIdentifier db rs newcid k
liftIO $ ContentIdentifier.flushDbQueue db
recordContentIdentifier rs newcid k
, removeExport = \k loc ->
removeExportWithContentIdentifier (importActions r) k loc
=<< getkeycids ciddbv k
, removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r)
-- renameExport is optional, and the remote's
-- implementation may lose modifications to the file
-- (by eg copying and then deleting) so don't use it
, renameExport = \_ _ _ -> return Nothing
, checkPresentExport = checkPresentImport ciddbv
}
prepciddb = do
lcklckv <- liftIO newEmptyTMVarIO
dbtv <- liftIO newEmptyTMVarIO
@ -277,14 +272,14 @@ adjustExportImport rmt rs = do
-- After opening the database, check if the export log is
-- different than the database, and update the database, to notice
-- when an export has been updated from another repository.
getexportdb r (dbv, lcklckv, exportinconflict) =
getexportdb (dbv, lcklckv, exportinconflict) =
liftIO (atomically (tryReadTMVar dbv)) >>= \case
Just db -> return db
-- let only one thread take the lock
Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ())
( do
db <- Export.openDb (uuid r)
updateexportdb db exportinconflict r
updateexportdb db exportinconflict
liftIO $ atomically $ putTMVar dbv db
return db
-- loser waits for winner to open the db and
@ -294,7 +289,7 @@ adjustExportImport rmt rs = do
getexportinconflict (_, _, v) = v
updateexportdb db exportinconflict r =
updateexportdb db exportinconflict =
Export.updateExportTreeFromLog db >>= \case
Export.ExportUpdateSuccess -> return ()
Export.ExportUpdateConflict -> do
@ -302,20 +297,56 @@ adjustExportImport rmt rs = do
liftIO $ atomically $
writeTVar exportinconflict True
getexportlocs r dbv k = do
db <- getexportdb r dbv
getexportlocs dbv k = do
db <- getexportdb dbv
liftIO $ Export.getExportTree db k
getfirstexportloc dbv k = do
db <- getexportdb dbv
liftIO $ Export.getExportTree db k >>= \case
[] -> ifM (atomically $ readTVar $ getexportinconflict dbv)
( giveup "unknown export location, likely due to the export conflict"
, giveup "unknown export location"
)
(l:_) -> return l
getkeycids ciddbv k = do
db <- getciddb ciddbv
liftIO $ ContentIdentifier.getContentIdentifiers db rs k
retrieveKeyFileFromExport r dbv k _af dest p = ifM (isVerifiable k)
-- Keys can be retrieved using retrieveExport, but since that
-- retrieves from a path in the remote that another writer could
-- have replaced with content not of the requested key, the content
-- has to be strongly verified.
retrieveKeyFileFromExport dbv k _af dest p = ifM (isVerifiable k)
( do
locs <- getexportlocs r dbv k
case locs of
[] -> ifM (liftIO $ atomically $ readTVar $ getexportinconflict dbv)
( giveup "unknown export location, likely due to the export conflict"
, giveup "unknown export location"
)
(l:_) -> do
retrieveExport (exportActions r) k l dest p
return UnVerified
l <- getfirstexportloc dbv k
retrieveExport (exportActions r) k l dest p
return MustVerify
, giveup $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend"
)
retrieveKeyFileFromImport dbv ciddbv k af dest p =
getkeycids ciddbv k >>= \case
(cid:_) -> do
l <- getfirstexportloc dbv k
void $ retrieveExportWithContentIdentifier (importActions r) l cid dest (pure k) p
return UnVerified
-- In case a content identifier is somehow missing,
-- try this instead.
[] -> retrieveKeyFileFromExport dbv k af dest p
-- appendonly remotes have a key/value store, so can use
-- the usual retrieveKeyFile, rather than an import/export
-- variant. However, fall back to that if retrieveKeyFile fails.
supportappendonlyretrieve k af dest p a
| appendonly r =
retrieveKeyFile r k af dest p
`catchNonAsync` const a
| otherwise = a
checkPresentImport ciddbv k loc =
checkPresentExportWithContentIdentifier
(importActions r)
k loc
=<< getkeycids ciddbv k

View file

@ -63,9 +63,9 @@ data RemoteTypeA a = RemoteType
, configParser :: RemoteConfig -> a RemoteConfigParser
-- initializes or enables a remote
, setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID)
-- check if a remote of this type is able to support export
-- check if a remote of this type is able to support export of trees
, exportSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool
-- check if a remote of this type is able to support import
-- check if a remote of this type is able to support import of trees
, importSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool
}

View file

@ -0,0 +1,30 @@
[[!comment format=mdwn
username="kyle"
avatar="http://cdn.libravatar.org/avatar/7d6e85cde1422ad60607c87fa87c63f3"
subject="comment 3"
date="2020-12-17T17:41:35Z"
content="""
> Is this a case for an rsync remote? (I haven't really figured out
> special remotes yet.) Or is there a typical workflow on the git annex
> side that I could be using to fix this (like `import` rather than
> `add`)?
I think conceptually that's a good fit. You could set
`importtree=yes` with the special remote and ingest changes with `git
annex import` on beta's side. However, the rsync special remote
doesn't support `importtree` yet.
https://git-annex.branchable.com/todo/import_tree_from_rsync_special_remote/
In your followup comment, you mention unlocked files. That would get
you around the link problem. You could call `rsync` with `--checksum`
to limit what is transferred, though that might be expensive depending
on how big your files are.
> Are [the annex.hardlink and annex.thin options] annex wide settings?
> (that seems to be the case). Is it possible to apply them at a
> folder level?
You can set then at the repository level in the repo's .git/config.
"""]]

View file

@ -0,0 +1,22 @@
[[!comment format=mdwn
username="dscheffy@c203b7661ec8c1ebd53e52627c84536c5f0c9026"
nickname="dscheffy"
avatar="http://cdn.libravatar.org/avatar/62a3a0bf0e203e3746eedbe48fd13f6d"
subject="comment 4"
date="2020-12-17T18:14:16Z"
content="""
Oh boy -- or should I say, oh \"`man`\"... Now I feel like a bit of an idiot for not checking the actual high level man pages... Thanks for that tip.
I did some experimenting and noticed another thing that I hadn't noticed -- although my binary version is v6.20180227 my annexes were all using the v5 index. That came as a bit of a surprise. Once I upgraded my annex to v6 the annex.thin settings started working.
As for rsync, I had tried the `-c` (`--checksum`) option, but it wasn't dereferencing the links on the target side, so the files still registered as different (at least I think I tried this, but I may go back and check again, because I was doing a lot of different things...) Nevermind, I just checked my history and I never actually tried it -- I had been using
```
rsync -n -v -r -c --delete ai2/ beta:/media/winston/library/lectures/coursera2/ai2/
```
to try to confirm that the contents of the two folders matched so that I'd know I could safely delete my local copy, but that didn't work because of the links. I didn't add the `-L` option until I reversed the direction and ran the command from the server side with `alpha` as the target.
Thanks for all the help -- this should work well enough for my stage folder issue, but it also solves a separate problem that I'd been struggling with for making my photos available to a self hosted photo webserver tool that I was trying out (photoprism). It can't currently handle symlinks and my local drive was getting filled up with all the extra copies of my photos directory tree!
"""]]

View file

@ -0,0 +1,26 @@
[[!comment format=mdwn
username="kyle"
avatar="http://cdn.libravatar.org/avatar/7d6e85cde1422ad60607c87fa87c63f3"
subject="comment 5"
date="2020-12-17T19:05:12Z"
content="""
> As for rsync, I had tried the -c (--checksum) option, but it wasn't
> dereferencing the links on the target side
Just to clarify: My comment was in the context of unlocked files (in
v6+ repos). In that case, symlinks aren't used: the content is kept
in the working tree (and a pointer file is tracked by git).
Also, since it sounds like you may want all files to be unlocked, you
might want to look into `git annex adjust --unlock` to enter an
adjusted with all files in an unlocked state.
> it also solves a separate problem that I'd been struggling with for
> making my photos available to a self hosted photo webserver tool
> that I was trying out (photoprism). It can't currently handle
> symlinks [...]
FWIW, if you don't need importing for this use case, I think using
`git annex export` with an rsync special remote configured with
`exporttree=yes` would work well.
"""]]

View file

@ -0,0 +1,9 @@
If two conflicting trees get imported from an import-only remote, it can
end up in conflict, the same as an export conflict. Except, git-annex
export cannot be used to resolve it, as it's import-only.
Seems that git-annex import should be able to resolve the conflict in this
case. What ever tree it imports is, after all, the tree that's on the
remote now, so just resolve the conflict to say it's the current one
in the export log. But I've not thought through it in detail yet.
--[[Joey]]