consistently use importKey when available
This avoids import with --no-content and with --content potentially generating two different trees, leading to a merge conflict when run in two different clones of a repo. And it's necessary groundwork to make git-annex sync --no-content import from special remotes that support importKey. Only the directory special remote currently supports importKey, and it generates the same key as git-annex usually does, so there is no behavior change for it. Future special remotes will need to take care when adding importKey, if it generates different keys. Added some warnings about that to comments. This commit was sponsored by Noam Kremen on Patreon.
This commit is contained in:
parent
15c1ee16d9
commit
3eaaec3113
6 changed files with 84 additions and 49 deletions
|
@ -384,19 +384,28 @@ importKeys remote importtreeconfig importcontent importablecontents = do
|
||||||
importaction
|
importaction
|
||||||
return (Right job)
|
return (Right job)
|
||||||
|
|
||||||
importordownload
|
importordownload cidmap db (loc, (cid, sz)) largematcher= do
|
||||||
| not importcontent = doimport
|
f <- locworktreefile loc
|
||||||
| otherwise = dodownload
|
matcher <- largematcher (fromRawFilePath f)
|
||||||
|
-- When importing a key is supported, always use it rather
|
||||||
|
-- than downloading and retrieving a key, to avoid
|
||||||
|
-- generating trees with different keys for the same content.
|
||||||
|
let act = if importcontent
|
||||||
|
then case Remote.importKey ia of
|
||||||
|
Nothing -> dodownload
|
||||||
|
Just _ -> if Utility.Matcher.introspect matchNeedsFileContent matcher
|
||||||
|
then dodownload
|
||||||
|
else doimport
|
||||||
|
else doimport
|
||||||
|
act cidmap db (loc, (cid, sz)) f matcher
|
||||||
|
|
||||||
doimport cidmap db (loc, (cid, sz)) largematcher =
|
doimport cidmap db (loc, (cid, sz)) f matcher =
|
||||||
case Remote.importKey ia of
|
case Remote.importKey ia of
|
||||||
Nothing -> error "internal" -- checked earlier
|
Nothing -> error "internal" -- checked earlier
|
||||||
Just importkey -> do
|
Just importkey -> do
|
||||||
f <- locworktreefile loc
|
|
||||||
matcher <- largematcher (fromRawFilePath f)
|
|
||||||
when (Utility.Matcher.introspect matchNeedsFileContent matcher) $
|
when (Utility.Matcher.introspect matchNeedsFileContent matcher) $
|
||||||
giveup "annex.largefiles configuration examines file contents, so cannot import without content."
|
giveup "annex.largefiles configuration examines file contents, so cannot import without content."
|
||||||
let mi = MatchingInfo ProvidedInfo
|
let mi = MatchingInfo ProvidedInfo
|
||||||
{ providedFilePath = f
|
{ providedFilePath = f
|
||||||
, providedKey = Nothing
|
, providedKey = Nothing
|
||||||
, providedFileSize = sz
|
, providedFileSize = sz
|
||||||
|
@ -405,18 +414,24 @@ importKeys remote importtreeconfig importcontent importablecontents = do
|
||||||
}
|
}
|
||||||
islargefile <- checkMatcher' matcher mi mempty
|
islargefile <- checkMatcher' matcher mi mempty
|
||||||
metered Nothing sz $ const $ if islargefile
|
metered Nothing sz $ const $ if islargefile
|
||||||
then doimportlarge importkey cidmap db loc cid sz
|
then doimportlarge importkey cidmap db loc cid sz f
|
||||||
else doimportsmall cidmap db loc cid sz
|
else doimportsmall cidmap db loc cid sz
|
||||||
|
|
||||||
doimportlarge importkey cidmap db loc cid sz p =
|
doimportlarge importkey cidmap db loc cid sz f p =
|
||||||
tryNonAsync importer >>= \case
|
tryNonAsync importer >>= \case
|
||||||
Right k -> return $ Just (loc, k)
|
Right (Just (k, True)) -> return $ Just (loc, Right k)
|
||||||
|
Right _ -> return Nothing
|
||||||
Left e -> do
|
Left e -> do
|
||||||
warning (show e)
|
warning (show e)
|
||||||
return Nothing
|
return Nothing
|
||||||
where
|
where
|
||||||
importer = do
|
importer = do
|
||||||
unsizedk <- importkey loc cid p
|
unsizedk <- importkey loc cid
|
||||||
|
-- Don't display progress when generating
|
||||||
|
-- key, if the content will later be
|
||||||
|
-- downloaded, which is a more expensive
|
||||||
|
-- operation generally.
|
||||||
|
(if importcontent then nullMeterUpdate else p)
|
||||||
-- This avoids every remote needing
|
-- This avoids every remote needing
|
||||||
-- to add the size.
|
-- to add the size.
|
||||||
let k = alterKey unsizedk $ \kd -> kd
|
let k = alterKey unsizedk $ \kd -> kd
|
||||||
|
@ -425,8 +440,27 @@ importKeys remote importtreeconfig importcontent importablecontents = do
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
recordcidkey cidmap db cid k
|
recordcidkey cidmap db cid k
|
||||||
logChange k (Remote.uuid remote) InfoPresent
|
logChange k (Remote.uuid remote) InfoPresent
|
||||||
return (Right k)
|
if importcontent
|
||||||
|
then getcontent k
|
||||||
|
else return (Just (k, True))
|
||||||
Just msg -> giveup (msg ++ " to import")
|
Just msg -> giveup (msg ++ " to import")
|
||||||
|
|
||||||
|
getcontent :: Key -> Annex (Maybe (Key, Bool))
|
||||||
|
getcontent k = do
|
||||||
|
let af = AssociatedFile (Just f)
|
||||||
|
let downloader p' tmpfile = do
|
||||||
|
k' <- Remote.retrieveExportWithContentIdentifier
|
||||||
|
ia loc cid tmpfile
|
||||||
|
(pure k)
|
||||||
|
(combineMeterUpdate p' p)
|
||||||
|
ok <- moveAnnex k' tmpfile
|
||||||
|
when ok $
|
||||||
|
logStatus k InfoPresent
|
||||||
|
return (Just (k', ok))
|
||||||
|
checkDiskSpaceToGet k Nothing $
|
||||||
|
notifyTransfer Download af $
|
||||||
|
download (Remote.uuid remote) k af stdRetry $ \p' ->
|
||||||
|
withTmp k $ downloader p'
|
||||||
|
|
||||||
-- The file is small, so is added to git, so while importing
|
-- The file is small, so is added to git, so while importing
|
||||||
-- without content does not retrieve annexed files, it does
|
-- without content does not retrieve annexed files, it does
|
||||||
|
@ -440,12 +474,12 @@ importKeys remote importtreeconfig importcontent importablecontents = do
|
||||||
case keyGitSha k of
|
case keyGitSha k of
|
||||||
Just sha -> do
|
Just sha -> do
|
||||||
recordcidkey cidmap db cid k
|
recordcidkey cidmap db cid k
|
||||||
return (Left sha)
|
return sha
|
||||||
Nothing -> error "internal"
|
Nothing -> error "internal"
|
||||||
checkDiskSpaceToGet tmpkey Nothing $
|
checkDiskSpaceToGet tmpkey Nothing $
|
||||||
withTmp tmpkey $ \tmpfile ->
|
withTmp tmpkey $ \tmpfile ->
|
||||||
tryNonAsync (downloader tmpfile) >>= \case
|
tryNonAsync (downloader tmpfile) >>= \case
|
||||||
Right v -> return $ Just (loc, v)
|
Right sha -> return $ Just (loc, Left sha)
|
||||||
Left e -> do
|
Left e -> do
|
||||||
warning (show e)
|
warning (show e)
|
||||||
return Nothing
|
return Nothing
|
||||||
|
@ -453,13 +487,12 @@ importKeys remote importtreeconfig importcontent importablecontents = do
|
||||||
tmpkey = importKey cid sz
|
tmpkey = importKey cid sz
|
||||||
mkkey tmpfile = gitShaKey <$> hashFile tmpfile
|
mkkey tmpfile = gitShaKey <$> hashFile tmpfile
|
||||||
|
|
||||||
dodownload cidmap db (loc, (cid, sz)) largematcher = do
|
dodownload cidmap db (loc, (cid, sz)) f matcher = do
|
||||||
f <- locworktreefile loc
|
|
||||||
let af = AssociatedFile (Just f)
|
let af = AssociatedFile (Just f)
|
||||||
let downloader tmpfile p = do
|
let downloader tmpfile p = do
|
||||||
k <- Remote.retrieveExportWithContentIdentifier
|
k <- Remote.retrieveExportWithContentIdentifier
|
||||||
ia loc cid tmpfile
|
ia loc cid tmpfile
|
||||||
(mkkey f tmpfile)
|
(mkkey tmpfile)
|
||||||
p
|
p
|
||||||
case keyGitSha k of
|
case keyGitSha k of
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
|
@ -487,8 +520,7 @@ importKeys remote importtreeconfig importcontent importablecontents = do
|
||||||
where
|
where
|
||||||
tmpkey = importKey cid sz
|
tmpkey = importKey cid sz
|
||||||
|
|
||||||
mkkey f tmpfile = do
|
mkkey tmpfile = do
|
||||||
matcher <- largematcher (fromRawFilePath f)
|
|
||||||
let mi = MatchingFile FileInfo
|
let mi = MatchingFile FileInfo
|
||||||
{ matchFile = f
|
{ matchFile = f
|
||||||
, contentFile = Just (toRawFilePath tmpfile)
|
, contentFile = Just (toRawFilePath tmpfile)
|
||||||
|
|
|
@ -225,10 +225,13 @@ seek' o = do
|
||||||
, map (withbranch . pullRemote o mergeConfig) gitremotes
|
, map (withbranch . pullRemote o mergeConfig) gitremotes
|
||||||
, [ mergeAnnex ]
|
, [ mergeAnnex ]
|
||||||
]
|
]
|
||||||
|
|
||||||
whenM (shouldSyncContent o) $ do
|
|
||||||
mapM_ (withbranch . importRemote o mergeConfig) importremotes
|
|
||||||
|
|
||||||
|
content <- shouldSyncContent o
|
||||||
|
|
||||||
|
forM_ importremotes $
|
||||||
|
withbranch . importRemote content o mergeConfig
|
||||||
|
|
||||||
|
when content $ do
|
||||||
-- Send content to any exports before other
|
-- Send content to any exports before other
|
||||||
-- repositories, in case that lets content
|
-- repositories, in case that lets content
|
||||||
-- be dropped from other repositories.
|
-- be dropped from other repositories.
|
||||||
|
@ -454,8 +457,8 @@ pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && want
|
||||||
ai = ActionItemOther (Just (Remote.name remote))
|
ai = ActionItemOther (Just (Remote.name remote))
|
||||||
si = SeekInput []
|
si = SeekInput []
|
||||||
|
|
||||||
importRemote :: SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandSeek
|
importRemote :: Bool -> SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandSeek
|
||||||
importRemote o mergeconfig remote currbranch
|
importRemote importcontent o mergeconfig remote currbranch
|
||||||
| not (pullOption o) || not wantpull = noop
|
| not (pullOption o) || not wantpull = noop
|
||||||
| otherwise = case remoteAnnexTrackingBranch (Remote.gitconfig remote) of
|
| otherwise = case remoteAnnexTrackingBranch (Remote.gitconfig remote) of
|
||||||
Nothing -> noop
|
Nothing -> noop
|
||||||
|
@ -465,7 +468,7 @@ importRemote o mergeconfig remote currbranch
|
||||||
let subdir = if S.null p
|
let subdir = if S.null p
|
||||||
then Nothing
|
then Nothing
|
||||||
else Just (asTopFilePath p)
|
else Just (asTopFilePath p)
|
||||||
Command.Import.seekRemote remote branch subdir True
|
Command.Import.seekRemote remote branch subdir importcontent
|
||||||
void $ mergeRemote remote currbranch mergeconfig o
|
void $ mergeRemote remote currbranch mergeconfig o
|
||||||
where
|
where
|
||||||
wantpull = remoteAnnexPull (Remote.gitconfig remote)
|
wantpull = remoteAnnexPull (Remote.gitconfig remote)
|
||||||
|
|
|
@ -186,7 +186,10 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`.
|
||||||
rather than `EXPECTED`.
|
rather than `EXPECTED`.
|
||||||
* `IMPORTKEY File`
|
* `IMPORTKEY File`
|
||||||
This only needs to be implemented if IMPORTKEYSUPPORTED indicates
|
This only needs to be implemented if IMPORTKEYSUPPORTED indicates
|
||||||
it is supported.
|
it is supported. And if a remote did not support it before, adding
|
||||||
|
it will make importing the same content as before generate a likely
|
||||||
|
different tree, which can lead to merge conflicts. So be careful
|
||||||
|
implementing this.
|
||||||
Generates a key by querying the remote for eg, a checksum.
|
Generates a key by querying the remote for eg, a checksum.
|
||||||
(See [[internals/key_format]] for details of how to format a key.)
|
(See [[internals/key_format]] for details of how to format a key.)
|
||||||
Any kind of key can be generated, depending on what the remote
|
Any kind of key can be generated, depending on what the remote
|
||||||
|
|
|
@ -98,23 +98,7 @@ the tree of files on the remote, even when importing into a subdirectory.
|
||||||
With --no-content, git-annex keys are generated from information
|
With --no-content, git-annex keys are generated from information
|
||||||
provided by the special remote, without downloading it. Commands like
|
provided by the special remote, without downloading it. Commands like
|
||||||
`git-annex get` can later be used to download files, as desired.
|
`git-annex get` can later be used to download files, as desired.
|
||||||
|
The --no-content option is not supported by all special remotes.
|
||||||
The --no-content option is not supported by all special remotes,
|
|
||||||
and the kind of git-annex key that is generated is left up to
|
|
||||||
each special remote. So while the directory special remote hashes
|
|
||||||
the file and generates the same key it usually would, other
|
|
||||||
special remotes may use unusual keys like SHA1, or WORM, depending
|
|
||||||
on the limitations of the special remote.
|
|
||||||
|
|
||||||
The annex.securehashesonly configuration, if set, will prevent
|
|
||||||
--no-content importing from a special remote that uses insecure keys.
|
|
||||||
|
|
||||||
Note that a different git tree may be generated when using
|
|
||||||
--no-content than would be generated when using --content, because
|
|
||||||
the options cause different kinds of keys to be used when importing
|
|
||||||
new/changed files. So mixing uses of --content and --no-content can
|
|
||||||
lead to merge conflicts in some situations. Some special remotes,
|
|
||||||
notably the directory special remote, avoid this problem.
|
|
||||||
|
|
||||||
# IMPORTING FROM A DIRECTORY
|
# IMPORTING FROM A DIRECTORY
|
||||||
|
|
||||||
|
|
|
@ -108,12 +108,14 @@ received.
|
||||||
This behavior can be overridden by configuring the preferred content
|
This behavior can be overridden by configuring the preferred content
|
||||||
of a repository. See [[git-annex-preferred-content]](1).
|
of a repository. See [[git-annex-preferred-content]](1).
|
||||||
|
|
||||||
When `remote.<name>.annex-tracking-branch` is configured for a special remote
|
When `remote.<name>.annex-tracking-branch` is configured for a special
|
||||||
and that branch is checked out, syncing content will import changes from
|
remote and that branch is checked out, syncing with --content will
|
||||||
the remote, merge them into the branch, and export any changes that have
|
import changes from the remote, merge them into the branch, and export
|
||||||
been committed to the branch back to the remote. See
|
any changes that have been committed to the branch back to the remote.
|
||||||
See [[git-annex-import]](1) and [[git-annex-export]](1) for details about
|
With --no-content, imports will only be made from special remotes that
|
||||||
how importing and exporting work.
|
support importing without transferting files, and no exports will be done.
|
||||||
|
See [[git-annex-import]](1) and [[git-annex-export]](1) for details
|
||||||
|
about how importing and exporting work.
|
||||||
|
|
||||||
* `--content-of=path` `-C path`
|
* `--content-of=path` `-C path`
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,18 @@ git-annex to use a different backend.
|
||||||
> content, then the remote could say, don't use importKey by default.
|
> content, then the remote could say, don't use importKey by default.
|
||||||
> (Or more likely, only the directory remote will be able to support
|
> (Or more likely, only the directory remote will be able to support
|
||||||
> importKey by default..)
|
> importKey by default..)
|
||||||
|
>
|
||||||
|
> Problem: When annex.largefiles matches file content,
|
||||||
|
> cannot use importKey. So then should sync --content not use importKey
|
||||||
|
> then, risking generating a different tree? Or should it fail, even
|
||||||
|
> though importing with content is possible?
|
||||||
|
>
|
||||||
|
> > Well, different annex.largefiles settings in different clones
|
||||||
|
> > can already risk generating a different tree on import. So,
|
||||||
|
> > the former option seems preferable.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
See also, [[todo/import_--no-content_largefiles_conflict]]
|
See also, [[todo/import_--no-content_largefiles_conflict]]
|
||||||
|
|
||||||
|
> [[done]] --[[Joey]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue