diff --git a/Annex/Import.hs b/Annex/Import.hs index 5d044ea33c..9b2a8ead93 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -384,19 +384,28 @@ importKeys remote importtreeconfig importcontent importablecontents = do importaction return (Right job) - importordownload - | not importcontent = doimport - | otherwise = dodownload + importordownload cidmap db (loc, (cid, sz)) largematcher= do + f <- locworktreefile loc + matcher <- largematcher (fromRawFilePath f) + -- When importing a key is supported, always use it rather + -- than downloading and retrieving a key, to avoid + -- generating trees with different keys for the same content. + let act = if importcontent + then case Remote.importKey ia of + Nothing -> dodownload + Just _ -> if Utility.Matcher.introspect matchNeedsFileContent matcher + then dodownload + else doimport + else doimport + act cidmap db (loc, (cid, sz)) f matcher - doimport cidmap db (loc, (cid, sz)) largematcher = + doimport cidmap db (loc, (cid, sz)) f matcher = case Remote.importKey ia of Nothing -> error "internal" -- checked earlier Just importkey -> do - f <- locworktreefile loc - matcher <- largematcher (fromRawFilePath f) when (Utility.Matcher.introspect matchNeedsFileContent matcher) $ giveup "annex.largefiles configuration examines file contents, so cannot import without content." - let mi = MatchingInfo ProvidedInfo + let mi = MatchingInfo ProvidedInfo { providedFilePath = f , providedKey = Nothing , providedFileSize = sz @@ -405,18 +414,24 @@ importKeys remote importtreeconfig importcontent importablecontents = do } islargefile <- checkMatcher' matcher mi mempty metered Nothing sz $ const $ if islargefile - then doimportlarge importkey cidmap db loc cid sz + then doimportlarge importkey cidmap db loc cid sz f else doimportsmall cidmap db loc cid sz - doimportlarge importkey cidmap db loc cid sz p = + doimportlarge importkey cidmap db loc cid sz f p = tryNonAsync importer >>= \case - Right k -> return $ Just (loc, k) + Right (Just (k, True)) -> return $ Just (loc, Right k) + Right _ -> return Nothing Left e -> do warning (show e) return Nothing where importer = do - unsizedk <- importkey loc cid p + unsizedk <- importkey loc cid + -- Don't display progress when generating + -- key, if the content will later be + -- downloaded, which is a more expensive + -- operation generally. + (if importcontent then nullMeterUpdate else p) -- This avoids every remote needing -- to add the size. let k = alterKey unsizedk $ \kd -> kd @@ -425,8 +440,27 @@ importKeys remote importtreeconfig importcontent importablecontents = do Nothing -> do recordcidkey cidmap db cid k logChange k (Remote.uuid remote) InfoPresent - return (Right k) + if importcontent + then getcontent k + else return (Just (k, True)) Just msg -> giveup (msg ++ " to import") + + getcontent :: Key -> Annex (Maybe (Key, Bool)) + getcontent k = do + let af = AssociatedFile (Just f) + let downloader p' tmpfile = do + k' <- Remote.retrieveExportWithContentIdentifier + ia loc cid tmpfile + (pure k) + (combineMeterUpdate p' p) + ok <- moveAnnex k' tmpfile + when ok $ + logStatus k InfoPresent + return (Just (k', ok)) + checkDiskSpaceToGet k Nothing $ + notifyTransfer Download af $ + download (Remote.uuid remote) k af stdRetry $ \p' -> + withTmp k $ downloader p' -- The file is small, so is added to git, so while importing -- without content does not retrieve annexed files, it does @@ -440,12 +474,12 @@ importKeys remote importtreeconfig importcontent importablecontents = do case keyGitSha k of Just sha -> do recordcidkey cidmap db cid k - return (Left sha) + return sha Nothing -> error "internal" checkDiskSpaceToGet tmpkey Nothing $ withTmp tmpkey $ \tmpfile -> tryNonAsync (downloader tmpfile) >>= \case - Right v -> return $ Just (loc, v) + Right sha -> return $ Just (loc, Left sha) Left e -> do warning (show e) return Nothing @@ -453,13 +487,12 @@ importKeys remote importtreeconfig importcontent importablecontents = do tmpkey = importKey cid sz mkkey tmpfile = gitShaKey <$> hashFile tmpfile - dodownload cidmap db (loc, (cid, sz)) largematcher = do - f <- locworktreefile loc + dodownload cidmap db (loc, (cid, sz)) f matcher = do let af = AssociatedFile (Just f) let downloader tmpfile p = do k <- Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile - (mkkey f tmpfile) + (mkkey tmpfile) p case keyGitSha k of Nothing -> do @@ -487,8 +520,7 @@ importKeys remote importtreeconfig importcontent importablecontents = do where tmpkey = importKey cid sz - mkkey f tmpfile = do - matcher <- largematcher (fromRawFilePath f) + mkkey tmpfile = do let mi = MatchingFile FileInfo { matchFile = f , contentFile = Just (toRawFilePath tmpfile) diff --git a/Command/Sync.hs b/Command/Sync.hs index 2c59a42664..ff17ad88b9 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -225,10 +225,13 @@ seek' o = do , map (withbranch . pullRemote o mergeConfig) gitremotes , [ mergeAnnex ] ] - - whenM (shouldSyncContent o) $ do - mapM_ (withbranch . importRemote o mergeConfig) importremotes + content <- shouldSyncContent o + + forM_ importremotes $ + withbranch . importRemote content o mergeConfig + + when content $ do -- Send content to any exports before other -- repositories, in case that lets content -- be dropped from other repositories. @@ -454,8 +457,8 @@ pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && want ai = ActionItemOther (Just (Remote.name remote)) si = SeekInput [] -importRemote :: SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandSeek -importRemote o mergeconfig remote currbranch +importRemote :: Bool -> SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandSeek +importRemote importcontent o mergeconfig remote currbranch | not (pullOption o) || not wantpull = noop | otherwise = case remoteAnnexTrackingBranch (Remote.gitconfig remote) of Nothing -> noop @@ -465,7 +468,7 @@ importRemote o mergeconfig remote currbranch let subdir = if S.null p then Nothing else Just (asTopFilePath p) - Command.Import.seekRemote remote branch subdir True + Command.Import.seekRemote remote branch subdir importcontent void $ mergeRemote remote currbranch mergeconfig o where wantpull = remoteAnnexPull (Remote.gitconfig remote) diff --git a/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn b/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn index 3434935647..d1b255a8d0 100644 --- a/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn +++ b/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn @@ -186,7 +186,10 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`. rather than `EXPECTED`. * `IMPORTKEY File` This only needs to be implemented if IMPORTKEYSUPPORTED indicates - it is supported. + it is supported. And if a remote did not support it before, adding + it will make importing the same content as before generate a likely + different tree, which can lead to merge conflicts. So be careful + implementing this. Generates a key by querying the remote for eg, a checksum. (See [[internals/key_format]] for details of how to format a key.) Any kind of key can be generated, depending on what the remote diff --git a/doc/git-annex-import.mdwn b/doc/git-annex-import.mdwn index b136d9f42c..d492d6b32e 100644 --- a/doc/git-annex-import.mdwn +++ b/doc/git-annex-import.mdwn @@ -98,23 +98,7 @@ the tree of files on the remote, even when importing into a subdirectory. With --no-content, git-annex keys are generated from information provided by the special remote, without downloading it. Commands like `git-annex get` can later be used to download files, as desired. - - The --no-content option is not supported by all special remotes, - and the kind of git-annex key that is generated is left up to - each special remote. So while the directory special remote hashes - the file and generates the same key it usually would, other - special remotes may use unusual keys like SHA1, or WORM, depending - on the limitations of the special remote. - - The annex.securehashesonly configuration, if set, will prevent - --no-content importing from a special remote that uses insecure keys. - - Note that a different git tree may be generated when using - --no-content than would be generated when using --content, because - the options cause different kinds of keys to be used when importing - new/changed files. So mixing uses of --content and --no-content can - lead to merge conflicts in some situations. Some special remotes, - notably the directory special remote, avoid this problem. + The --no-content option is not supported by all special remotes. # IMPORTING FROM A DIRECTORY diff --git a/doc/git-annex-sync.mdwn b/doc/git-annex-sync.mdwn index 007836d67b..4fae0f7c16 100644 --- a/doc/git-annex-sync.mdwn +++ b/doc/git-annex-sync.mdwn @@ -108,12 +108,14 @@ received. This behavior can be overridden by configuring the preferred content of a repository. See [[git-annex-preferred-content]](1). - When `remote..annex-tracking-branch` is configured for a special remote - and that branch is checked out, syncing content will import changes from - the remote, merge them into the branch, and export any changes that have - been committed to the branch back to the remote. See - See [[git-annex-import]](1) and [[git-annex-export]](1) for details about - how importing and exporting work. + When `remote..annex-tracking-branch` is configured for a special + remote and that branch is checked out, syncing with --content will + import changes from the remote, merge them into the branch, and export + any changes that have been committed to the branch back to the remote. + With --no-content, imports will only be made from special remotes that + support importing without transferting files, and no exports will be done. + See [[git-annex-import]](1) and [[git-annex-export]](1) for details + about how importing and exporting work. * `--content-of=path` `-C path` diff --git a/doc/todo/sync_fast_import.mdwn b/doc/todo/sync_fast_import.mdwn index 4e28911b1c..5ac51a03dd 100644 --- a/doc/todo/sync_fast_import.mdwn +++ b/doc/todo/sync_fast_import.mdwn @@ -35,7 +35,18 @@ git-annex to use a different backend. > content, then the remote could say, don't use importKey by default. > (Or more likely, only the directory remote will be able to support > importKey by default..) +> +> Problem: When annex.largefiles matches file content, +> cannot use importKey. So then should sync --content not use importKey +> then, risking generating a different tree? Or should it fail, even +> though importing with content is possible? +> +> > Well, different annex.largefiles settings in different clones +> > can already risk generating a different tree on import. So, +> > the former option seems preferable. --- See also, [[todo/import_--no-content_largefiles_conflict]] + +> [[done]] --[[Joey]]