From e81e43b8296b6db995f0da1771af86f85986ce5f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 13:12:52 -0400 Subject: [PATCH 01/13] improve comment --- Types/Remote.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Types/Remote.hs b/Types/Remote.hs index 2811f732ac..7b5f1ac0e5 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -63,9 +63,9 @@ data RemoteTypeA a = RemoteType , configParser :: RemoteConfig -> a RemoteConfigParser -- initializes or enables a remote , setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID) - -- check if a remote of this type is able to support export + -- check if a remote of this type is able to support export of trees , exportSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool - -- check if a remote of this type is able to support import + -- check if a remote of this type is able to support import of trees , importSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool } From 49c4d471ff82adccb903c498bbdd86729fc07435 Mon Sep 17 00:00:00 2001 From: kyle Date: Thu, 17 Dec 2020 17:41:36 +0000 Subject: [PATCH 02/13] Added a comment --- ..._d765f93511e0ba446f54f3ad30e91e6f._comment | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_3_d765f93511e0ba446f54f3ad30e91e6f._comment diff --git a/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_3_d765f93511e0ba446f54f3ad30e91e6f._comment b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_3_d765f93511e0ba446f54f3ad30e91e6f._comment new file mode 100644 index 0000000000..5a0372ca92 --- /dev/null +++ b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_3_d765f93511e0ba446f54f3ad30e91e6f._comment @@ -0,0 +1,30 @@ +[[!comment format=mdwn + username="kyle" + avatar="http://cdn.libravatar.org/avatar/7d6e85cde1422ad60607c87fa87c63f3" + subject="comment 3" + date="2020-12-17T17:41:35Z" + content=""" +> Is this a case for an rsync remote? (I haven't really figured out +> special remotes yet.) Or is there a typical workflow on the git annex +> side that I could be using to fix this (like `import` rather than +> `add`)? + +I think conceptually that's a good fit. You could set +`importtree=yes` with the special remote and ingest changes with `git +annex import` on beta's side. However, the rsync special remote +doesn't support `importtree` yet. + +https://git-annex.branchable.com/todo/import_tree_from_rsync_special_remote/ + +In your followup comment, you mention unlocked files. That would get +you around the link problem. You could call `rsync` with `--checksum` +to limit what is transferred, though that might be expensive depending +on how big your files are. + +> Are [the annex.hardlink and annex.thin options] annex wide settings? +> (that seems to be the case). Is it possible to apply them at a +> folder level? + +You can set then at the repository level in the repo's .git/config. + +"""]] From 1b5cb77acf3018aacb3a06d1bbc1ee72160a791b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 13:45:07 -0400 Subject: [PATCH 03/13] importtree only remotes are untrusted, same as exporttree remotes Importtree only remotes are new; importtree remotes used to always also be exporttree, so were untrusted. Since an import remote is one that can be edited by something other than git-annex, it's clearly not trustworthy at all. --- Logs/Trust.hs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Logs/Trust.hs b/Logs/Trust.hs index 9899cdaf47..615631b28f 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -66,11 +66,12 @@ trustMapLoad :: Annex TrustMap trustMapLoad = do overrides <- Annex.getState Annex.forcetrust l <- remoteList - -- Exports are not trusted, since they are not key/value stores. - -- This does not apply to appendonly exports, which are key/value - -- stores. + -- Export/import remotes are not trusted, since they are not + -- key/value stores. (Unless they are appendonly remotes.) + let isexportimport r = Types.Remote.isExportSupported r + <||> Types.Remote.isImportSupported r let untrustworthy r = pure (not (Types.Remote.appendonly r)) - <&&> Types.Remote.isExportSupported r + <&&> isexportimport r exports <- filterM untrustworthy l let exportoverrides = M.fromList $ map (\r -> (Types.Remote.uuid r, UnTrusted)) exports From 4d2cd58ee5a891a517b386c555d032722cc4d97b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 13:46:34 -0400 Subject: [PATCH 04/13] provide missing remote actions for importree only remote Ah, it seemed too easy before when I was implementing importrree only, and it was because all the key-based actions needed to be handled too. Mostly copied from isexport, and this works. It does seem that an import remote could use retrieveExportWithContentIdentifier rather than retrieveExport, and checkPresentExportWithContentIdentifier rather than checkPresentExport, which would both be more accurate. --- Remote/Helper/ExportImport.hs | 120 ++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 48 deletions(-) diff --git a/Remote/Helper/ExportImport.hs b/Remote/Helper/ExportImport.hs index 27b63bab4d..e755604f1e 100644 --- a/Remote/Helper/ExportImport.hs +++ b/Remote/Helper/ExportImport.hs @@ -106,7 +106,7 @@ adjustExportImport rmt rs = do case (exportTree (config rmt), importTree (config rmt)) of (True, True) -> isimport dbv =<< isexport dbv rmt (True, False) -> notimport =<< isexport dbv rmt - (False, True) -> notexport =<< isimport dbv rmt + (False, True) -> notexport =<< isimportonly dbv rmt (False, False) -> notimport =<< notexport rmt where notexport r = return $ r @@ -122,54 +122,7 @@ adjustExportImport rmt rs = do { importSupported = importUnsupported } } - - isimport dbv r = do - ciddbv <- prepciddb - let keycids k = do - db <- getciddb ciddbv - liftIO $ ContentIdentifier.getContentIdentifiers db rs k - - let checkpresent k loc = - checkPresentExportWithContentIdentifier - (importActions r) - k loc - =<< keycids k - - return $ r - { exportActions = (exportActions r) - { storeExport = \f k loc p -> do - db <- getciddb ciddbv - exportdb <- getexportdb r dbv - oldks <- liftIO $ Export.getExportTreeKey exportdb loc - oldcids <- liftIO $ concat - <$> mapM (ContentIdentifier.getContentIdentifiers db rs) oldks - newcid <- storeExportWithContentIdentifier (importActions r) f k loc oldcids p - withExclusiveLock gitAnnexContentIdentifierLock $ do - liftIO $ ContentIdentifier.recordContentIdentifier db rs newcid k - liftIO $ ContentIdentifier.flushDbQueue db - recordContentIdentifier rs newcid k - , removeExport = \k loc -> - removeExportWithContentIdentifier (importActions r) k loc - =<< keycids k - , removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r) - -- renameExport is optional, and the - -- remote's implementation may - -- lose modifications to the file - -- (by eg copying and then deleting) - -- so don't use it - , renameExport = \_ _ _ -> return Nothing - , checkPresentExport = checkpresent - } - , checkPresent = if appendonly r - then checkPresent r - else \k -> anyM (checkpresent k) - =<< getexportlocs r dbv k - , getInfo = do - is <- getInfo r - return (is++[("import", "yes")]) - } - isexport dbv r = ifM (isExportSupported r) ( isexport' dbv r , notexport r @@ -241,6 +194,77 @@ adjustExportImport rmt rs = do is <- getInfo r return (is++[("export", "yes"), ("exportedtree", unwords ts)]) } + + isimport dbv r = do + ciddbv <- prepciddb + + let keycids k = do + db <- getciddb ciddbv + liftIO $ ContentIdentifier.getContentIdentifiers db rs k + + let checkpresent k loc = + checkPresentExportWithContentIdentifier + (importActions r) + k loc + =<< keycids k + + return $ r + { exportActions = (exportActions r) + { storeExport = \f k loc p -> do + db <- getciddb ciddbv + exportdb <- getexportdb r dbv + oldks <- liftIO $ Export.getExportTreeKey exportdb loc + oldcids <- liftIO $ concat + <$> mapM (ContentIdentifier.getContentIdentifiers db rs) oldks + newcid <- storeExportWithContentIdentifier (importActions r) f k loc oldcids p + withExclusiveLock gitAnnexContentIdentifierLock $ do + liftIO $ ContentIdentifier.recordContentIdentifier db rs newcid k + liftIO $ ContentIdentifier.flushDbQueue db + recordContentIdentifier rs newcid k + , removeExport = \k loc -> + removeExportWithContentIdentifier (importActions r) k loc + =<< keycids k + , removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r) + -- renameExport is optional, and the + -- remote's implementation may + -- lose modifications to the file + -- (by eg copying and then deleting) + -- so don't use it + , renameExport = \_ _ _ -> return Nothing + , checkPresentExport = checkpresent + } + , checkPresent = if appendonly r + then checkPresent r + else \k -> anyM (checkpresent k) + =<< getexportlocs r dbv k + , getInfo = do + is <- getInfo r + return (is++[("import", "yes")]) + } + + isimportonly dbv r' = isimport dbv r' >>= \r -> return $ r + { storeKey = \_ _ _ -> + giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it" + , retrieveKeyFile = \k af dest p -> + let retrieveexport = retrieveKeyFileFromExport r dbv k af dest p + in if appendonly r + then retrieveKeyFile r k af dest p + `catchNonAsync` const retrieveexport + else retrieveexport + , retrieveKeyFileCheap = if appendonly r + then retrieveKeyFileCheap r + else Nothing + , removeKey = \_k -> giveup "dropping content from this remote is not supported because it is configured with importtree=yes and without exporttree=yes" + , lockContent = if appendonly r + then lockContent r + else Nothing + , checkPresent = if appendonly r + then checkPresent r + else \k -> anyM (checkPresentExport (exportActions r) k) + =<< getexportlocs r dbv k + , checkPresentCheap = False + , mkUnavailable = return Nothing + } prepciddb = do lcklckv <- liftIO newEmptyTMVarIO From c53057caa3e5e2114d6e448f1d1b40abeb85cb53 Mon Sep 17 00:00:00 2001 From: "dscheffy@c203b7661ec8c1ebd53e52627c84536c5f0c9026" Date: Thu, 17 Dec 2020 18:14:16 +0000 Subject: [PATCH 05/13] Added a comment --- ..._62cf96d62228b52bf7dd52eab0b94f67._comment | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_4_62cf96d62228b52bf7dd52eab0b94f67._comment diff --git a/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_4_62cf96d62228b52bf7dd52eab0b94f67._comment b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_4_62cf96d62228b52bf7dd52eab0b94f67._comment new file mode 100644 index 0000000000..f61165767e --- /dev/null +++ b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_4_62cf96d62228b52bf7dd52eab0b94f67._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="dscheffy@c203b7661ec8c1ebd53e52627c84536c5f0c9026" + nickname="dscheffy" + avatar="http://cdn.libravatar.org/avatar/62a3a0bf0e203e3746eedbe48fd13f6d" + subject="comment 4" + date="2020-12-17T18:14:16Z" + content=""" +Oh boy -- or should I say, oh \"`man`\"... Now I feel like a bit of an idiot for not checking the actual high level man pages... Thanks for that tip. + +I did some experimenting and noticed another thing that I hadn't noticed -- although my binary version is v6.20180227 my annexes were all using the v5 index. That came as a bit of a surprise. Once I upgraded my annex to v6 the annex.thin settings started working. + +As for rsync, I had tried the `-c` (`--checksum`) option, but it wasn't dereferencing the links on the target side, so the files still registered as different (at least I think I tried this, but I may go back and check again, because I was doing a lot of different things...) Nevermind, I just checked my history and I never actually tried it -- I had been using + +``` +rsync -n -v -r -c --delete ai2/ beta:/media/winston/library/lectures/coursera2/ai2/ +``` + +to try to confirm that the contents of the two folders matched so that I'd know I could safely delete my local copy, but that didn't work because of the links. I didn't add the `-L` option until I reversed the direction and ran the command from the server side with `alpha` as the target. + +Thanks for all the help -- this should work well enough for my stage folder issue, but it also solves a separate problem that I'd been struggling with for making my photos available to a self hosted photo webserver tool that I was trying out (photoprism). It can't currently handle symlinks and my local drive was getting filled up with all the extra copies of my photos directory tree! + +"""]] From ceda8c0066e87d560bab5f7088b044b57f45b333 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 14:01:42 -0400 Subject: [PATCH 06/13] refactor common code --- Remote/Helper/ExportImport.hs | 53 +++++++++++++++-------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/Remote/Helper/ExportImport.hs b/Remote/Helper/ExportImport.hs index e755604f1e..439c7d854d 100644 --- a/Remote/Helper/ExportImport.hs +++ b/Remote/Helper/ExportImport.hs @@ -128,7 +128,7 @@ adjustExportImport rmt rs = do , notexport r ) - isexport' dbv r = return $ r + isexport' dbv r = return $ isimportorexport $ r -- Storing a key on an export could be implemented, -- but it would perform unncessary work -- when another repository has already stored the @@ -151,9 +151,6 @@ adjustExportImport rmt rs = do then retrieveKeyFile r k af dest p `catchNonAsync` const retrieveexport else retrieveexport - , retrieveKeyFileCheap = if appendonly r - then retrieveKeyFileCheap r - else Nothing -- Removing a key from an export would need to -- change the tree in the export log to not include -- the file. Otherwise, conflicts when removing @@ -161,14 +158,6 @@ adjustExportImport rmt rs = do -- There does not seem to be a good use case for -- removing a key from an export in any case. , removeKey = \_k -> giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove" - -- Can't lock content on exports, since they're - -- not key/value stores, and someone else could - -- change what's exported to a file at any time. - -- - -- (except for appendonly remotes) - , lockContent = if appendonly r - then lockContent r - else Nothing -- Check if any of the files a key was exported to -- are present. This doesn't guarantee the export -- contains the right content, which is why export @@ -180,21 +169,13 @@ adjustExportImport rmt rs = do then checkPresent r else \k -> anyM (checkPresentExport (exportActions r) k) =<< getexportlocs r dbv k - -- checkPresent from an export is more expensive - -- than otherwise, so not cheap. Also, this - -- avoids things that look at checkPresentCheap and - -- silently skip non-present files from behaving - -- in confusing ways when there's an export - -- conflict. - , checkPresentCheap = False - , mkUnavailable = return Nothing , getInfo = do ts <- map fromRef . exportedTreeishes <$> getExport (uuid r) is <- getInfo r return (is++[("export", "yes"), ("exportedtree", unwords ts)]) } - + isimport dbv r = do ciddbv <- prepciddb @@ -208,7 +189,7 @@ adjustExportImport rmt rs = do k loc =<< keycids k - return $ r + return $ isimportorexport $ r { exportActions = (exportActions r) { storeExport = \f k loc p -> do db <- getciddb ciddbv @@ -251,18 +232,30 @@ adjustExportImport rmt rs = do then retrieveKeyFile r k af dest p `catchNonAsync` const retrieveexport else retrieveexport + , removeKey = \_k -> giveup "dropping content from this remote is not supported because it is configured with importtree=yes and without exporttree=yes" + } + + isimportorexport r = r + -- Can't lock content on import/export, since they're + -- not key/value stores, and someone else could + -- change what's exported to a file at any time. + -- + -- (except for appendonly remotes) + { lockContent = if appendonly r + then lockContent r + else Nothing , retrieveKeyFileCheap = if appendonly r then retrieveKeyFileCheap r else Nothing - , removeKey = \_k -> giveup "dropping content from this remote is not supported because it is configured with importtree=yes and without exporttree=yes" - , lockContent = if appendonly r - then lockContent r - else Nothing - , checkPresent = if appendonly r - then checkPresent r - else \k -> anyM (checkPresentExport (exportActions r) k) - =<< getexportlocs r dbv k + -- checkPresent from an export is more expensive + -- than otherwise, so not cheap. Also, this + -- avoids things that look at checkPresentCheap and + -- silently skip non-present files from behaving + -- in confusing ways when there's an export + -- conflict (or an import conflict). , checkPresentCheap = False + -- git-annex testremote cannot be used to test + -- import/export since it stores keys. , mkUnavailable = return Nothing } From 48da7e002d1c2fd7b39f4b27cc7717455d3e2a5b Mon Sep 17 00:00:00 2001 From: kyle Date: Thu, 17 Dec 2020 19:05:13 +0000 Subject: [PATCH 07/13] Added a comment --- ..._40e4e9fe7d301b5e75140136d0132860._comment | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_5_40e4e9fe7d301b5e75140136d0132860._comment diff --git a/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_5_40e4e9fe7d301b5e75140136d0132860._comment b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_5_40e4e9fe7d301b5e75140136d0132860._comment new file mode 100644 index 0000000000..d797576390 --- /dev/null +++ b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_5_40e4e9fe7d301b5e75140136d0132860._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="kyle" + avatar="http://cdn.libravatar.org/avatar/7d6e85cde1422ad60607c87fa87c63f3" + subject="comment 5" + date="2020-12-17T19:05:12Z" + content=""" +> As for rsync, I had tried the -c (--checksum) option, but it wasn't +> dereferencing the links on the target side + +Just to clarify: My comment was in the context of unlocked files (in +v6+ repos). In that case, symlinks aren't used: the content is kept +in the working tree (and a pointer file is tracked by git). + +Also, since it sounds like you may want all files to be unlocked, you +might want to look into `git annex adjust --unlock` to enter an +adjusted with all files in an unlocked state. + +> it also solves a separate problem that I'd been struggling with for +> making my photos available to a self hosted photo webserver tool +> that I was trying out (photoprism). It can't currently handle +> symlinks [...] + +FWIW, if you don't need importing for this use case, I think using +`git annex export` with an rsync special remote configured with +`exporttree=yes` would work well. +"""]] From 5946e7136e88cf450071e9e47af0f6db90aea09e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 15:31:22 -0400 Subject: [PATCH 08/13] force verification after getting file from export remote This way, if annex.verify is disabled, it's still checked, since this is not a key/value store, it has to be checked. --- Remote/Helper/ExportImport.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Remote/Helper/ExportImport.hs b/Remote/Helper/ExportImport.hs index 439c7d854d..84ed778a20 100644 --- a/Remote/Helper/ExportImport.hs +++ b/Remote/Helper/ExportImport.hs @@ -333,6 +333,6 @@ adjustExportImport rmt rs = do ) (l:_) -> do retrieveExport (exportActions r) k l dest p - return UnVerified + return MustVerify , giveup $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend" ) From f2ecc6e0daddf563fc6d150d86e4508193468570 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 15:52:12 -0400 Subject: [PATCH 09/13] import remotes use ContentIdentifier for getting and checking content This is better than using the equivilant actions for export remotes, especially for getting content, since the ContentIdentifier checking means we can be sure (enough) that the content is valid to not force verification of content. Which allows getting keys of types that cannot be verified. Also, reorganized the internals of adjustExportImport which was becoming very hard to follow. Now it's clear what each method does in each case. --- Remote/Helper/ExportImport.hs | 322 +++++++++++++++++----------------- 1 file changed, 164 insertions(+), 158 deletions(-) diff --git a/Remote/Helper/ExportImport.hs b/Remote/Helper/ExportImport.hs index 84ed778a20..412f297066 100644 --- a/Remote/Helper/ExportImport.hs +++ b/Remote/Helper/ExportImport.hs @@ -101,164 +101,134 @@ adjustExportImportRemoteType rt = rt { setup = setup' } -- | Adjust a remote to support exporttree=yes and/or importree=yes. adjustExportImport :: Remote -> RemoteStateHandle -> Annex Remote -adjustExportImport rmt rs = do +adjustExportImport r rs = do + isexport <- pure (exportTree (config r)) <&&> isExportSupported r + isimport <- pure (importTree (config r)) <&&> isImportSupported r + let normal = not isexport && not isimport + let iskeyvaluestore = normal || appendonly r dbv <- prepdbv - case (exportTree (config rmt), importTree (config rmt)) of - (True, True) -> isimport dbv =<< isexport dbv rmt - (True, False) -> notimport =<< isexport dbv rmt - (False, True) -> notexport =<< isimportonly dbv rmt - (False, False) -> notimport =<< notexport rmt - where - notexport r = return $ r - { exportActions = exportUnsupported - , remotetype = (remotetype r) - { exportSupported = exportUnsupported + ciddbv <- prepciddb + return $ r + { remotetype = (remotetype r) + { exportSupported = if isexport + then exportSupported (remotetype r) + else exportUnsupported + , importSupported = if isimport + then importSupported (remotetype r) + else importUnsupported } - } - - notimport r = return $ r - { importActions = importUnsupported - , remotetype = (remotetype r) - { importSupported = importUnsupported - } - } - - isexport dbv r = ifM (isExportSupported r) - ( isexport' dbv r - , notexport r - ) - - isexport' dbv r = return $ isimportorexport $ r - -- Storing a key on an export could be implemented, - -- but it would perform unncessary work - -- when another repository has already stored the - -- key, and the local repository does not know - -- about it. To avoid unnecessary costs, don't do it. - { storeKey = \_ _ _ -> - giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it" - -- Keys can be retrieved using retrieveExport, - -- but since that retrieves from a path in the - -- remote that another writer could have replaced - -- with content not of the requested key, - -- the content has to be strongly verified. - -- - -- appendonly remotes have a key/value store, - -- so don't need to use retrieveExport. However, - -- fall back to it if retrieveKeyFile fails. - , retrieveKeyFile = \k af dest p -> - let retrieveexport = retrieveKeyFileFromExport r dbv k af dest p - in if appendonly r - then retrieveKeyFile r k af dest p - `catchNonAsync` const retrieveexport - else retrieveexport - -- Removing a key from an export would need to - -- change the tree in the export log to not include - -- the file. Otherwise, conflicts when removing - -- files would not be dealt with correctly. - -- There does not seem to be a good use case for - -- removing a key from an export in any case. - , removeKey = \_k -> giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove" - -- Check if any of the files a key was exported to - -- are present. This doesn't guarantee the export - -- contains the right content, which is why export - -- remotes are untrusted. - -- - -- (but appendonly remotes work the same as any - -- non-export remote) - , checkPresent = if appendonly r - then checkPresent r - else \k -> anyM (checkPresentExport (exportActions r) k) - =<< getexportlocs r dbv k - , getInfo = do - ts <- map fromRef . exportedTreeishes - <$> getExport (uuid r) - is <- getInfo r - return (is++[("export", "yes"), ("exportedtree", unwords ts)]) - } - - isimport dbv r = do - ciddbv <- prepciddb - - let keycids k = do - db <- getciddb ciddbv - liftIO $ ContentIdentifier.getContentIdentifiers db rs k - - let checkpresent k loc = - checkPresentExportWithContentIdentifier - (importActions r) - k loc - =<< keycids k - - return $ isimportorexport $ r - { exportActions = (exportActions r) - { storeExport = \f k loc p -> do - db <- getciddb ciddbv - exportdb <- getexportdb r dbv - oldks <- liftIO $ Export.getExportTreeKey exportdb loc - oldcids <- liftIO $ concat - <$> mapM (ContentIdentifier.getContentIdentifiers db rs) oldks - newcid <- storeExportWithContentIdentifier (importActions r) f k loc oldcids p - withExclusiveLock gitAnnexContentIdentifierLock $ do - liftIO $ ContentIdentifier.recordContentIdentifier db rs newcid k - liftIO $ ContentIdentifier.flushDbQueue db - recordContentIdentifier rs newcid k - , removeExport = \k loc -> - removeExportWithContentIdentifier (importActions r) k loc - =<< keycids k - , removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r) - -- renameExport is optional, and the - -- remote's implementation may - -- lose modifications to the file - -- (by eg copying and then deleting) - -- so don't use it - , renameExport = \_ _ _ -> return Nothing - , checkPresentExport = checkpresent - } - , checkPresent = if appendonly r - then checkPresent r - else \k -> anyM (checkpresent k) - =<< getexportlocs r dbv k - , getInfo = do - is <- getInfo r - return (is++[("import", "yes")]) - } - - isimportonly dbv r' = isimport dbv r' >>= \r -> return $ r - { storeKey = \_ _ _ -> - giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it" - , retrieveKeyFile = \k af dest p -> - let retrieveexport = retrieveKeyFileFromExport r dbv k af dest p - in if appendonly r - then retrieveKeyFile r k af dest p - `catchNonAsync` const retrieveexport - else retrieveexport - , removeKey = \_k -> giveup "dropping content from this remote is not supported because it is configured with importtree=yes and without exporttree=yes" - } - - isimportorexport r = r - -- Can't lock content on import/export, since they're - -- not key/value stores, and someone else could - -- change what's exported to a file at any time. - -- - -- (except for appendonly remotes) - { lockContent = if appendonly r + , exportActions = if isexport + then if isimport + then exportActionsForImport dbv ciddbv (exportActions r) + else exportActions r + else exportUnsupported + , importActions = if isimport + then importActions r + else importUnsupported + , storeKey = \k af p -> + -- Storing a key on an export could be implemented, + -- but it would perform unncessary work + -- when another repository has already stored the + -- key, and the local repository does not know + -- about it. To avoid unnecessary costs, don't do it. + if isexport + then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it" + else if isimport + then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it" + else storeKey r k af p + , removeKey = \k -> + -- Removing a key from an export would need to + -- change the tree in the export log to not include + -- the file. Otherwise, conflicts when removing + -- files would not be dealt with correctly. + -- There does not seem to be a good use case for + -- removing a key from an export in any case. + if isexport + then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove" + else if isimport + then giveup "dropping content from this remote is not supported because it is configured with importtree=yes" + else removeKey r k + , lockContent = if iskeyvaluestore then lockContent r else Nothing - , retrieveKeyFileCheap = if appendonly r + , retrieveKeyFile = \k af dest p -> + if isimport + then supportappendonlyretrieve k af dest p $ + retrieveKeyFileFromImport dbv ciddbv k af dest p + else if isexport + then supportappendonlyretrieve k af dest p $ + retrieveKeyFileFromExport dbv k af dest p + else retrieveKeyFile r k af dest p + , retrieveKeyFileCheap = if iskeyvaluestore then retrieveKeyFileCheap r else Nothing + , checkPresent = \k -> if appendonly r + then checkPresent r k + else if isimport + then anyM (checkPresentImport ciddbv k) + =<< getexportlocs dbv k + else if isexport + -- Check if any of the files a key + -- was exported to are present. This + -- doesn't guarantee the export + -- contains the right content, + -- which is why export remotes + -- are untrusted. + then anyM (checkPresentExport (exportActions r) k) + =<< getexportlocs dbv k + else checkPresent r k -- checkPresent from an export is more expensive -- than otherwise, so not cheap. Also, this -- avoids things that look at checkPresentCheap and -- silently skip non-present files from behaving -- in confusing ways when there's an export -- conflict (or an import conflict). - , checkPresentCheap = False + , checkPresentCheap = if normal + then checkPresentCheap r + else False -- git-annex testremote cannot be used to test -- import/export since it stores keys. - , mkUnavailable = return Nothing + , mkUnavailable = if normal + then mkUnavailable r + else return Nothing + , getInfo = do + is <- getInfo r + is' <- if isexport + then do + ts <- map fromRef . exportedTreeishes + <$> getExport (uuid r) + return (is++[("export", "yes"), ("exportedtree", unwords ts)]) + else return is + return $ if isimport + then (is'++[("import", "yes")]) + else is' } - + where + -- exportActions adjusted to use the equivilant import actions, + -- which take ContentIdentifiers into account. + exportActionsForImport dbv ciddbv ea = ea + { storeExport = \f k loc p -> do + db <- getciddb ciddbv + exportdb <- getexportdb dbv + oldks <- liftIO $ Export.getExportTreeKey exportdb loc + oldcids <- liftIO $ concat + <$> mapM (ContentIdentifier.getContentIdentifiers db rs) oldks + newcid <- storeExportWithContentIdentifier (importActions r) f k loc oldcids p + withExclusiveLock gitAnnexContentIdentifierLock $ do + liftIO $ ContentIdentifier.recordContentIdentifier db rs newcid k + liftIO $ ContentIdentifier.flushDbQueue db + recordContentIdentifier rs newcid k + , removeExport = \k loc -> + removeExportWithContentIdentifier (importActions r) k loc + =<< getkeycids ciddbv k + , removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r) + -- renameExport is optional, and the remote's + -- implementation may lose modifications to the file + -- (by eg copying and then deleting) so don't use it + , renameExport = \_ _ _ -> return Nothing + , checkPresentExport = checkPresentImport ciddbv + } + prepciddb = do lcklckv <- liftIO newEmptyTMVarIO dbtv <- liftIO newEmptyTMVarIO @@ -294,14 +264,14 @@ adjustExportImport rmt rs = do -- After opening the database, check if the export log is -- different than the database, and update the database, to notice -- when an export has been updated from another repository. - getexportdb r (dbv, lcklckv, exportinconflict) = + getexportdb (dbv, lcklckv, exportinconflict) = liftIO (atomically (tryReadTMVar dbv)) >>= \case Just db -> return db -- let only one thread take the lock Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ()) ( do db <- Export.openDb (uuid r) - updateexportdb db exportinconflict r + updateexportdb db exportinconflict liftIO $ atomically $ putTMVar dbv db return db -- loser waits for winner to open the db and @@ -311,7 +281,7 @@ adjustExportImport rmt rs = do getexportinconflict (_, _, v) = v - updateexportdb db exportinconflict r = + updateexportdb db exportinconflict = Export.updateExportTreeFromLog db >>= \case Export.ExportUpdateSuccess -> return () Export.ExportUpdateConflict -> do @@ -319,20 +289,56 @@ adjustExportImport rmt rs = do liftIO $ atomically $ writeTVar exportinconflict True - getexportlocs r dbv k = do - db <- getexportdb r dbv + getexportlocs dbv k = do + db <- getexportdb dbv liftIO $ Export.getExportTree db k + + getfirstexportloc dbv k = do + db <- getexportdb dbv + liftIO $ Export.getExportTree db k >>= \case + [] -> ifM (atomically $ readTVar $ getexportinconflict dbv) + ( giveup "unknown export location, likely due to the export conflict" + , giveup "unknown export location" + ) + (l:_) -> return l + + getkeycids ciddbv k = do + db <- getciddb ciddbv + liftIO $ ContentIdentifier.getContentIdentifiers db rs k - retrieveKeyFileFromExport r dbv k _af dest p = ifM (isVerifiable k) + -- Keys can be retrieved using retrieveExport, but since that + -- retrieves from a path in the remote that another writer could + -- have replaced with content not of the requested key, the content + -- has to be strongly verified. + retrieveKeyFileFromExport dbv k _af dest p = ifM (isVerifiable k) ( do - locs <- getexportlocs r dbv k - case locs of - [] -> ifM (liftIO $ atomically $ readTVar $ getexportinconflict dbv) - ( giveup "unknown export location, likely due to the export conflict" - , giveup "unknown export location" - ) - (l:_) -> do - retrieveExport (exportActions r) k l dest p - return MustVerify + l <- getfirstexportloc dbv k + retrieveExport (exportActions r) k l dest p + return MustVerify , giveup $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend" ) + + retrieveKeyFileFromImport dbv ciddbv k af dest p = + getkeycids ciddbv k >>= \case + (cid:_) -> do + l <- getfirstexportloc dbv k + void $ retrieveExportWithContentIdentifier (importActions r) l cid dest (pure k) p + return UnVerified + -- In case a content identifier is somehow missing, + -- try this instead. + [] -> retrieveKeyFileFromExport dbv k af dest p + + -- appendonly remotes have a key/value store, so can use + -- the usual retrieveKeyFile, rather than an import/export + -- variant. However, fall back to that if retrieveKeyFile fails. + supportappendonlyretrieve k af dest p a + | appendonly r = + retrieveKeyFile r k af dest p + `catchNonAsync` const a + | otherwise = a + + checkPresentImport ciddbv k loc = + checkPresentExportWithContentIdentifier + (importActions r) + k loc + =<< getkeycids ciddbv k From a4451ac391d4aa832fca35f442a9bde713ff999f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 15:58:14 -0400 Subject: [PATCH 10/13] add missing space --- Annex/Export.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Annex/Export.hs b/Annex/Export.hs index 0d2ff65235..98ccab1515 100644 --- a/Annex/Export.hs +++ b/Annex/Export.hs @@ -47,6 +47,6 @@ warnExportImportConflict r = do True -> "exported to and/or imported from" False -> "exported to" toplevelWarning True $ - "Conflict detected. Different trees have been " ++ ops ++ + "Conflict detected. Different trees have been " ++ ops ++ " " ++ Remote.name r ++ ". Use git-annex export to resolve this conflict." From 77aedbef8bd755754f1395dec8f5c57f6d6e576f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 16:25:02 -0400 Subject: [PATCH 11/13] fix call to warnExportImportConflict That needs a Remote that has the right export/import set up, not the input Remote, which does not yet. --- Remote/Helper/ExportImport.hs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Remote/Helper/ExportImport.hs b/Remote/Helper/ExportImport.hs index 412f297066..9ff6f92800 100644 --- a/Remote/Helper/ExportImport.hs +++ b/Remote/Helper/ExportImport.hs @@ -104,11 +104,7 @@ adjustExportImport :: Remote -> RemoteStateHandle -> Annex Remote adjustExportImport r rs = do isexport <- pure (exportTree (config r)) <&&> isExportSupported r isimport <- pure (importTree (config r)) <&&> isImportSupported r - let normal = not isexport && not isimport - let iskeyvaluestore = normal || appendonly r - dbv <- prepdbv - ciddbv <- prepciddb - return $ r + let r' = r { remotetype = (remotetype r) { exportSupported = if isexport then exportSupported (remotetype r) @@ -117,7 +113,19 @@ adjustExportImport r rs = do then importSupported (remotetype r) else importUnsupported } - , exportActions = if isexport + } + if not isexport && not isimport + then return r' + else adjustExportImport' isexport isimport r' rs + +adjustExportImport' :: Bool -> Bool -> Remote -> RemoteStateHandle -> Annex Remote +adjustExportImport' isexport isimport r rs = do + dbv <- prepdbv + ciddbv <- prepciddb + let normal = not isexport && not isimport + let iskeyvaluestore = normal || appendonly r + return $ r + { exportActions = if isexport then if isimport then exportActionsForImport dbv ciddbv (exportActions r) else exportActions r From 400bdb48dbbf98937db8b4667aa10da5c2f06809 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 16:25:46 -0400 Subject: [PATCH 12/13] update warnExportImportConflict for import-only remotes --- Annex/Export.hs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/Annex/Export.hs b/Annex/Export.hs index 98ccab1515..b7e83dd749 100644 --- a/Annex/Export.hs +++ b/Annex/Export.hs @@ -43,10 +43,15 @@ exportKey sha = mk <$> catKey sha warnExportImportConflict :: Remote -> Annex () warnExportImportConflict r = do - ops <- Remote.isImportSupported r >>= return . \case - True -> "exported to and/or imported from" - False -> "exported to" - toplevelWarning True $ - "Conflict detected. Different trees have been " ++ ops ++ " " ++ - Remote.name r ++ - ". Use git-annex export to resolve this conflict." + isimport <- Remote.isImportSupported r + isexport <- Remote.isExportSupported r + let (ops, resolvcmd) = case (isexport, isimport) of + (False, True) -> ("imported from", "git-annex import") + (True, False) -> ("exported to", "git-annex export") + _ -> ("exported to and/or imported from", "git-annex export") + toplevelWarning True $ unwords + [ "Conflict detected. Different trees have been" + , ops, Remote.name r ++ ". Use" + , resolvcmd + , "to resolve this conflict." + ] From 4c63cab4677e83f04d6bf467b10949374a0f1ed5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 17 Dec 2020 16:29:51 -0400 Subject: [PATCH 13/13] todo --- ...ort_only_remotes_need_a_way_to_resolve_conflicts.mdwn | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/todo/import_only_remotes_need_a_way_to_resolve_conflicts.mdwn diff --git a/doc/todo/import_only_remotes_need_a_way_to_resolve_conflicts.mdwn b/doc/todo/import_only_remotes_need_a_way_to_resolve_conflicts.mdwn new file mode 100644 index 0000000000..c0ac777658 --- /dev/null +++ b/doc/todo/import_only_remotes_need_a_way_to_resolve_conflicts.mdwn @@ -0,0 +1,9 @@ +If two conflicting trees get imported from an import-only remote, it can +end up in conflict, the same as an export conflict. Except, git-annex +export cannot be used to resolve it, as it's import-only. + +Seems that git-annex import should be able to resolve the conflict in this +case. What ever tree it imports is, after all, the tree that's on the +remote now, so just resolve the conflict to say it's the current one +in the export log. But I've not thought through it in detail yet. +--[[Joey]]