diff --git a/Annex/Export.hs b/Annex/Export.hs index 0d2ff65235..b7e83dd749 100644 --- a/Annex/Export.hs +++ b/Annex/Export.hs @@ -43,10 +43,15 @@ exportKey sha = mk <$> catKey sha warnExportImportConflict :: Remote -> Annex () warnExportImportConflict r = do - ops <- Remote.isImportSupported r >>= return . \case - True -> "exported to and/or imported from" - False -> "exported to" - toplevelWarning True $ - "Conflict detected. Different trees have been " ++ ops ++ - Remote.name r ++ - ". Use git-annex export to resolve this conflict." + isimport <- Remote.isImportSupported r + isexport <- Remote.isExportSupported r + let (ops, resolvcmd) = case (isexport, isimport) of + (False, True) -> ("imported from", "git-annex import") + (True, False) -> ("exported to", "git-annex export") + _ -> ("exported to and/or imported from", "git-annex export") + toplevelWarning True $ unwords + [ "Conflict detected. Different trees have been" + , ops, Remote.name r ++ ". Use" + , resolvcmd + , "to resolve this conflict." + ] diff --git a/Logs/Trust.hs b/Logs/Trust.hs index 9899cdaf47..615631b28f 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -66,11 +66,12 @@ trustMapLoad :: Annex TrustMap trustMapLoad = do overrides <- Annex.getState Annex.forcetrust l <- remoteList - -- Exports are not trusted, since they are not key/value stores. - -- This does not apply to appendonly exports, which are key/value - -- stores. + -- Export/import remotes are not trusted, since they are not + -- key/value stores. (Unless they are appendonly remotes.) + let isexportimport r = Types.Remote.isExportSupported r + <||> Types.Remote.isImportSupported r let untrustworthy r = pure (not (Types.Remote.appendonly r)) - <&&> Types.Remote.isExportSupported r + <&&> isexportimport r exports <- filterM untrustworthy l let exportoverrides = M.fromList $ map (\r -> (Types.Remote.uuid r, UnTrusted)) exports diff --git a/Remote/Helper/ExportImport.hs b/Remote/Helper/ExportImport.hs index 27b63bab4d..9ff6f92800 100644 --- a/Remote/Helper/ExportImport.hs +++ b/Remote/Helper/ExportImport.hs @@ -101,147 +101,142 @@ adjustExportImportRemoteType rt = rt { setup = setup' } -- | Adjust a remote to support exporttree=yes and/or importree=yes. adjustExportImport :: Remote -> RemoteStateHandle -> Annex Remote -adjustExportImport rmt rs = do +adjustExportImport r rs = do + isexport <- pure (exportTree (config r)) <&&> isExportSupported r + isimport <- pure (importTree (config r)) <&&> isImportSupported r + let r' = r + { remotetype = (remotetype r) + { exportSupported = if isexport + then exportSupported (remotetype r) + else exportUnsupported + , importSupported = if isimport + then importSupported (remotetype r) + else importUnsupported + } + } + if not isexport && not isimport + then return r' + else adjustExportImport' isexport isimport r' rs + +adjustExportImport' :: Bool -> Bool -> Remote -> RemoteStateHandle -> Annex Remote +adjustExportImport' isexport isimport r rs = do dbv <- prepdbv - case (exportTree (config rmt), importTree (config rmt)) of - (True, True) -> isimport dbv =<< isexport dbv rmt - (True, False) -> notimport =<< isexport dbv rmt - (False, True) -> notexport =<< isimport dbv rmt - (False, False) -> notimport =<< notexport rmt - where - notexport r = return $ r - { exportActions = exportUnsupported - , remotetype = (remotetype r) - { exportSupported = exportUnsupported - } - } - - notimport r = return $ r - { importActions = importUnsupported - , remotetype = (remotetype r) - { importSupported = importUnsupported - } - } - - isimport dbv r = do - ciddbv <- prepciddb - - let keycids k = do - db <- getciddb ciddbv - liftIO $ ContentIdentifier.getContentIdentifiers db rs k - - let checkpresent k loc = - checkPresentExportWithContentIdentifier - (importActions r) - k loc - =<< keycids k - - return $ r - { exportActions = (exportActions r) - { storeExport = \f k loc p -> do - db <- getciddb ciddbv - exportdb <- getexportdb r dbv - oldks <- liftIO $ Export.getExportTreeKey exportdb loc - oldcids <- liftIO $ concat - <$> mapM (ContentIdentifier.getContentIdentifiers db rs) oldks - newcid <- storeExportWithContentIdentifier (importActions r) f k loc oldcids p - withExclusiveLock gitAnnexContentIdentifierLock $ do - liftIO $ ContentIdentifier.recordContentIdentifier db rs newcid k - liftIO $ ContentIdentifier.flushDbQueue db - recordContentIdentifier rs newcid k - , removeExport = \k loc -> - removeExportWithContentIdentifier (importActions r) k loc - =<< keycids k - , removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r) - -- renameExport is optional, and the - -- remote's implementation may - -- lose modifications to the file - -- (by eg copying and then deleting) - -- so don't use it - , renameExport = \_ _ _ -> return Nothing - , checkPresentExport = checkpresent - } - , checkPresent = if appendonly r - then checkPresent r - else \k -> anyM (checkpresent k) - =<< getexportlocs r dbv k - , getInfo = do - is <- getInfo r - return (is++[("import", "yes")]) - } - - isexport dbv r = ifM (isExportSupported r) - ( isexport' dbv r - , notexport r - ) - - isexport' dbv r = return $ r - -- Storing a key on an export could be implemented, - -- but it would perform unncessary work - -- when another repository has already stored the - -- key, and the local repository does not know - -- about it. To avoid unnecessary costs, don't do it. - { storeKey = \_ _ _ -> - giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it" - -- Keys can be retrieved using retrieveExport, - -- but since that retrieves from a path in the - -- remote that another writer could have replaced - -- with content not of the requested key, - -- the content has to be strongly verified. - -- - -- appendonly remotes have a key/value store, - -- so don't need to use retrieveExport. However, - -- fall back to it if retrieveKeyFile fails. - , retrieveKeyFile = \k af dest p -> - let retrieveexport = retrieveKeyFileFromExport r dbv k af dest p - in if appendonly r - then retrieveKeyFile r k af dest p - `catchNonAsync` const retrieveexport - else retrieveexport - , retrieveKeyFileCheap = if appendonly r - then retrieveKeyFileCheap r - else Nothing - -- Removing a key from an export would need to - -- change the tree in the export log to not include - -- the file. Otherwise, conflicts when removing - -- files would not be dealt with correctly. - -- There does not seem to be a good use case for - -- removing a key from an export in any case. - , removeKey = \_k -> giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove" - -- Can't lock content on exports, since they're - -- not key/value stores, and someone else could - -- change what's exported to a file at any time. - -- - -- (except for appendonly remotes) - , lockContent = if appendonly r + ciddbv <- prepciddb + let normal = not isexport && not isimport + let iskeyvaluestore = normal || appendonly r + return $ r + { exportActions = if isexport + then if isimport + then exportActionsForImport dbv ciddbv (exportActions r) + else exportActions r + else exportUnsupported + , importActions = if isimport + then importActions r + else importUnsupported + , storeKey = \k af p -> + -- Storing a key on an export could be implemented, + -- but it would perform unncessary work + -- when another repository has already stored the + -- key, and the local repository does not know + -- about it. To avoid unnecessary costs, don't do it. + if isexport + then giveup "remote is configured with exporttree=yes; use `git-annex export` to store content on it" + else if isimport + then giveup "remote is configured with importtree=yes and without exporttree=yes; cannot modify content stored on it" + else storeKey r k af p + , removeKey = \k -> + -- Removing a key from an export would need to + -- change the tree in the export log to not include + -- the file. Otherwise, conflicts when removing + -- files would not be dealt with correctly. + -- There does not seem to be a good use case for + -- removing a key from an export in any case. + if isexport + then giveup "dropping content from an export is not supported; use `git annex export` to export a tree that lacks the files you want to remove" + else if isimport + then giveup "dropping content from this remote is not supported because it is configured with importtree=yes" + else removeKey r k + , lockContent = if iskeyvaluestore then lockContent r else Nothing - -- Check if any of the files a key was exported to - -- are present. This doesn't guarantee the export - -- contains the right content, which is why export - -- remotes are untrusted. - -- - -- (but appendonly remotes work the same as any - -- non-export remote) - , checkPresent = if appendonly r - then checkPresent r - else \k -> anyM (checkPresentExport (exportActions r) k) - =<< getexportlocs r dbv k + , retrieveKeyFile = \k af dest p -> + if isimport + then supportappendonlyretrieve k af dest p $ + retrieveKeyFileFromImport dbv ciddbv k af dest p + else if isexport + then supportappendonlyretrieve k af dest p $ + retrieveKeyFileFromExport dbv k af dest p + else retrieveKeyFile r k af dest p + , retrieveKeyFileCheap = if iskeyvaluestore + then retrieveKeyFileCheap r + else Nothing + , checkPresent = \k -> if appendonly r + then checkPresent r k + else if isimport + then anyM (checkPresentImport ciddbv k) + =<< getexportlocs dbv k + else if isexport + -- Check if any of the files a key + -- was exported to are present. This + -- doesn't guarantee the export + -- contains the right content, + -- which is why export remotes + -- are untrusted. + then anyM (checkPresentExport (exportActions r) k) + =<< getexportlocs dbv k + else checkPresent r k -- checkPresent from an export is more expensive -- than otherwise, so not cheap. Also, this -- avoids things that look at checkPresentCheap and -- silently skip non-present files from behaving -- in confusing ways when there's an export - -- conflict. - , checkPresentCheap = False - , mkUnavailable = return Nothing + -- conflict (or an import conflict). + , checkPresentCheap = if normal + then checkPresentCheap r + else False + -- git-annex testremote cannot be used to test + -- import/export since it stores keys. + , mkUnavailable = if normal + then mkUnavailable r + else return Nothing , getInfo = do - ts <- map fromRef . exportedTreeishes - <$> getExport (uuid r) is <- getInfo r - return (is++[("export", "yes"), ("exportedtree", unwords ts)]) + is' <- if isexport + then do + ts <- map fromRef . exportedTreeishes + <$> getExport (uuid r) + return (is++[("export", "yes"), ("exportedtree", unwords ts)]) + else return is + return $ if isimport + then (is'++[("import", "yes")]) + else is' } - + where + -- exportActions adjusted to use the equivilant import actions, + -- which take ContentIdentifiers into account. + exportActionsForImport dbv ciddbv ea = ea + { storeExport = \f k loc p -> do + db <- getciddb ciddbv + exportdb <- getexportdb dbv + oldks <- liftIO $ Export.getExportTreeKey exportdb loc + oldcids <- liftIO $ concat + <$> mapM (ContentIdentifier.getContentIdentifiers db rs) oldks + newcid <- storeExportWithContentIdentifier (importActions r) f k loc oldcids p + withExclusiveLock gitAnnexContentIdentifierLock $ do + liftIO $ ContentIdentifier.recordContentIdentifier db rs newcid k + liftIO $ ContentIdentifier.flushDbQueue db + recordContentIdentifier rs newcid k + , removeExport = \k loc -> + removeExportWithContentIdentifier (importActions r) k loc + =<< getkeycids ciddbv k + , removeExportDirectory = removeExportDirectoryWhenEmpty (importActions r) + -- renameExport is optional, and the remote's + -- implementation may lose modifications to the file + -- (by eg copying and then deleting) so don't use it + , renameExport = \_ _ _ -> return Nothing + , checkPresentExport = checkPresentImport ciddbv + } + prepciddb = do lcklckv <- liftIO newEmptyTMVarIO dbtv <- liftIO newEmptyTMVarIO @@ -277,14 +272,14 @@ adjustExportImport rmt rs = do -- After opening the database, check if the export log is -- different than the database, and update the database, to notice -- when an export has been updated from another repository. - getexportdb r (dbv, lcklckv, exportinconflict) = + getexportdb (dbv, lcklckv, exportinconflict) = liftIO (atomically (tryReadTMVar dbv)) >>= \case Just db -> return db -- let only one thread take the lock Nothing -> ifM (liftIO $ atomically $ tryPutTMVar lcklckv ()) ( do db <- Export.openDb (uuid r) - updateexportdb db exportinconflict r + updateexportdb db exportinconflict liftIO $ atomically $ putTMVar dbv db return db -- loser waits for winner to open the db and @@ -294,7 +289,7 @@ adjustExportImport rmt rs = do getexportinconflict (_, _, v) = v - updateexportdb db exportinconflict r = + updateexportdb db exportinconflict = Export.updateExportTreeFromLog db >>= \case Export.ExportUpdateSuccess -> return () Export.ExportUpdateConflict -> do @@ -302,20 +297,56 @@ adjustExportImport rmt rs = do liftIO $ atomically $ writeTVar exportinconflict True - getexportlocs r dbv k = do - db <- getexportdb r dbv + getexportlocs dbv k = do + db <- getexportdb dbv liftIO $ Export.getExportTree db k + + getfirstexportloc dbv k = do + db <- getexportdb dbv + liftIO $ Export.getExportTree db k >>= \case + [] -> ifM (atomically $ readTVar $ getexportinconflict dbv) + ( giveup "unknown export location, likely due to the export conflict" + , giveup "unknown export location" + ) + (l:_) -> return l + + getkeycids ciddbv k = do + db <- getciddb ciddbv + liftIO $ ContentIdentifier.getContentIdentifiers db rs k - retrieveKeyFileFromExport r dbv k _af dest p = ifM (isVerifiable k) + -- Keys can be retrieved using retrieveExport, but since that + -- retrieves from a path in the remote that another writer could + -- have replaced with content not of the requested key, the content + -- has to be strongly verified. + retrieveKeyFileFromExport dbv k _af dest p = ifM (isVerifiable k) ( do - locs <- getexportlocs r dbv k - case locs of - [] -> ifM (liftIO $ atomically $ readTVar $ getexportinconflict dbv) - ( giveup "unknown export location, likely due to the export conflict" - , giveup "unknown export location" - ) - (l:_) -> do - retrieveExport (exportActions r) k l dest p - return UnVerified + l <- getfirstexportloc dbv k + retrieveExport (exportActions r) k l dest p + return MustVerify , giveup $ "exported content cannot be verified due to using the " ++ decodeBS (formatKeyVariety (fromKey keyVariety k)) ++ " backend" ) + + retrieveKeyFileFromImport dbv ciddbv k af dest p = + getkeycids ciddbv k >>= \case + (cid:_) -> do + l <- getfirstexportloc dbv k + void $ retrieveExportWithContentIdentifier (importActions r) l cid dest (pure k) p + return UnVerified + -- In case a content identifier is somehow missing, + -- try this instead. + [] -> retrieveKeyFileFromExport dbv k af dest p + + -- appendonly remotes have a key/value store, so can use + -- the usual retrieveKeyFile, rather than an import/export + -- variant. However, fall back to that if retrieveKeyFile fails. + supportappendonlyretrieve k af dest p a + | appendonly r = + retrieveKeyFile r k af dest p + `catchNonAsync` const a + | otherwise = a + + checkPresentImport ciddbv k loc = + checkPresentExportWithContentIdentifier + (importActions r) + k loc + =<< getkeycids ciddbv k diff --git a/Types/Remote.hs b/Types/Remote.hs index 2811f732ac..7b5f1ac0e5 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -63,9 +63,9 @@ data RemoteTypeA a = RemoteType , configParser :: RemoteConfig -> a RemoteConfigParser -- initializes or enables a remote , setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID) - -- check if a remote of this type is able to support export + -- check if a remote of this type is able to support export of trees , exportSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool - -- check if a remote of this type is able to support import + -- check if a remote of this type is able to support import of trees , importSupported :: ParsedRemoteConfig -> RemoteGitConfig -> a Bool } diff --git a/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_3_d765f93511e0ba446f54f3ad30e91e6f._comment b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_3_d765f93511e0ba446f54f3ad30e91e6f._comment new file mode 100644 index 0000000000..5a0372ca92 --- /dev/null +++ b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_3_d765f93511e0ba446f54f3ad30e91e6f._comment @@ -0,0 +1,30 @@ +[[!comment format=mdwn + username="kyle" + avatar="http://cdn.libravatar.org/avatar/7d6e85cde1422ad60607c87fa87c63f3" + subject="comment 3" + date="2020-12-17T17:41:35Z" + content=""" +> Is this a case for an rsync remote? (I haven't really figured out +> special remotes yet.) Or is there a typical workflow on the git annex +> side that I could be using to fix this (like `import` rather than +> `add`)? + +I think conceptually that's a good fit. You could set +`importtree=yes` with the special remote and ingest changes with `git +annex import` on beta's side. However, the rsync special remote +doesn't support `importtree` yet. + +https://git-annex.branchable.com/todo/import_tree_from_rsync_special_remote/ + +In your followup comment, you mention unlocked files. That would get +you around the link problem. You could call `rsync` with `--checksum` +to limit what is transferred, though that might be expensive depending +on how big your files are. + +> Are [the annex.hardlink and annex.thin options] annex wide settings? +> (that seems to be the case). Is it possible to apply them at a +> folder level? + +You can set then at the repository level in the repo's .git/config. + +"""]] diff --git a/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_4_62cf96d62228b52bf7dd52eab0b94f67._comment b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_4_62cf96d62228b52bf7dd52eab0b94f67._comment new file mode 100644 index 0000000000..f61165767e --- /dev/null +++ b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_4_62cf96d62228b52bf7dd52eab0b94f67._comment @@ -0,0 +1,22 @@ +[[!comment format=mdwn + username="dscheffy@c203b7661ec8c1ebd53e52627c84536c5f0c9026" + nickname="dscheffy" + avatar="http://cdn.libravatar.org/avatar/62a3a0bf0e203e3746eedbe48fd13f6d" + subject="comment 4" + date="2020-12-17T18:14:16Z" + content=""" +Oh boy -- or should I say, oh \"`man`\"... Now I feel like a bit of an idiot for not checking the actual high level man pages... Thanks for that tip. + +I did some experimenting and noticed another thing that I hadn't noticed -- although my binary version is v6.20180227 my annexes were all using the v5 index. That came as a bit of a surprise. Once I upgraded my annex to v6 the annex.thin settings started working. + +As for rsync, I had tried the `-c` (`--checksum`) option, but it wasn't dereferencing the links on the target side, so the files still registered as different (at least I think I tried this, but I may go back and check again, because I was doing a lot of different things...) Nevermind, I just checked my history and I never actually tried it -- I had been using + +``` +rsync -n -v -r -c --delete ai2/ beta:/media/winston/library/lectures/coursera2/ai2/ +``` + +to try to confirm that the contents of the two folders matched so that I'd know I could safely delete my local copy, but that didn't work because of the links. I didn't add the `-L` option until I reversed the direction and ran the command from the server side with `alpha` as the target. + +Thanks for all the help -- this should work well enough for my stage folder issue, but it also solves a separate problem that I'd been struggling with for making my photos available to a self hosted photo webserver tool that I was trying out (photoprism). It can't currently handle symlinks and my local drive was getting filled up with all the extra copies of my photos directory tree! + +"""]] diff --git a/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_5_40e4e9fe7d301b5e75140136d0132860._comment b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_5_40e4e9fe7d301b5e75140136d0132860._comment new file mode 100644 index 0000000000..d797576390 --- /dev/null +++ b/doc/forum/rsync_into_annex_without_overwriting_links__63__/comment_5_40e4e9fe7d301b5e75140136d0132860._comment @@ -0,0 +1,26 @@ +[[!comment format=mdwn + username="kyle" + avatar="http://cdn.libravatar.org/avatar/7d6e85cde1422ad60607c87fa87c63f3" + subject="comment 5" + date="2020-12-17T19:05:12Z" + content=""" +> As for rsync, I had tried the -c (--checksum) option, but it wasn't +> dereferencing the links on the target side + +Just to clarify: My comment was in the context of unlocked files (in +v6+ repos). In that case, symlinks aren't used: the content is kept +in the working tree (and a pointer file is tracked by git). + +Also, since it sounds like you may want all files to be unlocked, you +might want to look into `git annex adjust --unlock` to enter an +adjusted with all files in an unlocked state. + +> it also solves a separate problem that I'd been struggling with for +> making my photos available to a self hosted photo webserver tool +> that I was trying out (photoprism). It can't currently handle +> symlinks [...] + +FWIW, if you don't need importing for this use case, I think using +`git annex export` with an rsync special remote configured with +`exporttree=yes` would work well. +"""]] diff --git a/doc/todo/import_only_remotes_need_a_way_to_resolve_conflicts.mdwn b/doc/todo/import_only_remotes_need_a_way_to_resolve_conflicts.mdwn new file mode 100644 index 0000000000..c0ac777658 --- /dev/null +++ b/doc/todo/import_only_remotes_need_a_way_to_resolve_conflicts.mdwn @@ -0,0 +1,9 @@ +If two conflicting trees get imported from an import-only remote, it can +end up in conflict, the same as an export conflict. Except, git-annex +export cannot be used to resolve it, as it's import-only. + +Seems that git-annex import should be able to resolve the conflict in this +case. What ever tree it imports is, after all, the tree that's on the +remote now, so just resolve the conflict to say it's the current one +in the export log. But I've not thought through it in detail yet. +--[[Joey]]