import: Check gitignores when importing trees from special remotes

It seemed best to do this, for consistency with every other way files can
get into a git-annex repo. Although it's just a bit strange that a local
.gitignore file affects the pseudo-commits made for the remote that's
imported from.

This commit was sponsored by Brett Eisenberg on Patreon.
This commit is contained in:
Joey Hess 2020-09-30 10:41:59 -04:00
parent 0033e08193
commit c56efbbdb6
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 46 additions and 29 deletions

View file

@ -36,6 +36,7 @@ import Annex.Export
import Annex.RemoteTrackingBranch
import Annex.HashObject
import Annex.Transfer
import Annex.CheckIgnore
import Command
import Backend
import Types.Key
@ -623,10 +624,14 @@ makeImportMatcher r = load preferredContentKeylessTokens >>= \case
- write a git tree that contains that, git will complain and refuse to
- check it out.
-
- Filters out things not matching the FileMatcher.
- Filters out new things not matching the FileMatcher or that are
- gitignored. However, files that are already in git get imported
- regardless. (Similar to how git add behaves on gitignored files.)
- This avoids creating a remote tracking branch that, when merged,
- would delete the files.
-}
getImportableContents :: Remote -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
getImportableContents r matcher =
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
getImportableContents r importtreeconfig ci matcher =
Remote.listImportableContents (Remote.importActions r) >>= \case
Nothing -> return Nothing
Just importable -> do
@ -640,23 +645,23 @@ getImportableContents r matcher =
wanted dbhandle (loc, (_cid, sz))
| ".git" `elem` Posix.splitDirectories (fromImportLocation loc) =
pure False
| otherwise = shouldImport dbhandle matcher loc sz
| otherwise = wantImport importtreeconfig ci matcher loc sz
<||> isKnownImportLocation dbhandle loc
{- If a file is not preferred content, but it was previously exported or
- imported to the remote, not importing it would result in a remote
- tracking branch that, when merged, would delete the file.
isKnownImportLocation :: Export.ExportHandle -> ImportLocation -> Annex Bool
isKnownImportLocation dbhandle loc = liftIO $
not . null <$> Export.getExportTreeKey dbhandle loc
{- The matcher is matched relative to the top of the tree of files on the
- remote, even when importing into a subdirectory.
-
- To avoid that problem, such files are included in the import.
- The next export will remove them from the remote.
- However, when checking gitignores, the subdirectory is included
- so it will look at the gitignore file in it.
-}
shouldImport :: Export.ExportHandle -> FileMatcher Annex -> ImportLocation -> ByteSize -> Annex Bool
shouldImport dbhandle matcher loc sz =
wantImport matcher loc sz
<||>
liftIO (not . null <$> Export.getExportTreeKey dbhandle loc)
wantImport :: FileMatcher Annex -> ImportLocation -> ByteSize -> Annex Bool
wantImport matcher loc sz = checkMatcher' matcher mi mempty
wantImport :: ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> ImportLocation -> ByteSize -> Annex Bool
wantImport importtreeconfig ci matcher loc sz =
checkMatcher' matcher mi mempty
<&&> (not <$> checkIgnored ci f)
where
mi = MatchingInfo $ ProvidedInfo
{ providedFilePath = fromImportLocation loc
@ -665,3 +670,8 @@ wantImport matcher loc sz = checkMatcher' matcher mi mempty
, providedMimeType = Nothing
, providedMimeEncoding = Nothing
}
f = fromRawFilePath $ case importtreeconfig of
ImportSubTree dir _ ->
getTopFilePath dir P.</> fromImportLocation loc
ImportTree ->
fromImportLocation loc

View file

@ -13,6 +13,7 @@ git-annex (8.20200909) UNRELEASED; urgency=medium
This fixes failures uploading to S3 when using -J.
* add, addurl, importfeed, import: Added --no-check-gitignore option
for finer grained control than using --force.
* import: Check gitignores when importing trees from special remotes.
* addunused: Don't check .gitignores when adding files.
* Improve the "Try making some of these repositories available"
message, with some hints for the user for what to do.

View file

@ -56,6 +56,7 @@ data ImportOptions
, importToBranch :: Branch
, importToSubDir :: Maybe FilePath
, importContent :: Bool
, checkGitIgnoreOption :: CheckGitIgnore
}
optParser :: CmdParamsDesc -> Parser ImportOptions
@ -76,6 +77,7 @@ optParser desc = do
(Ref (encodeBS' branch))
(if null subdir then Nothing else Just subdir)
content
ic
_ -> giveup "expected BRANCH[:SUBDIR]"
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
@ -122,7 +124,7 @@ seek o@(RemoteImportOptions {}) = startConcurrency commandStages $ do
(pure Nothing)
(Just <$$> inRepo . toTopFilePath . toRawFilePath)
(importToSubDir o)
seekRemote r (importToBranch o) subdir (importContent o)
seekRemote r (importToBranch o) subdir (importContent o) (checkGitIgnoreOption o)
startLocal :: ImportOptions -> AddUnlockedMatcher -> GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
@ -268,8 +270,8 @@ verifyExisting key destfile (yes, no) = do
verifyEnoughCopiesToDrop [] key Nothing need [] preverified tocheck
(const yes) no
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CommandSeek
seekRemote remote branch msubdir importcontent = do
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CheckGitIgnore -> CommandSeek
seekRemote remote branch msubdir importcontent ci = do
importtreeconfig <- case msubdir of
Nothing -> return ImportTree
Just subdir ->
@ -286,7 +288,7 @@ seekRemote remote branch msubdir importcontent = do
let commitimport = commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig
importabletvar <- liftIO $ newTVarIO Nothing
void $ includeCommandAction (listContents remote importabletvar)
void $ includeCommandAction (listContents remote importtreeconfig ci importabletvar)
liftIO (atomically (readTVar importabletvar)) >>= \case
Nothing -> return ()
Just importable -> importKeys remote importtreeconfig importcontent importable >>= \case
@ -305,10 +307,10 @@ seekRemote remote branch msubdir importcontent = do
fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)
listContents :: Remote -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart
listContents remote tvar = starting "list" ai si $
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart
listContents remote importtreeconfig ci tvar = starting "list" ai si $
makeImportMatcher remote >>= \case
Right matcher -> getImportableContents remote matcher >>= \case
Right matcher -> getImportableContents remote importtreeconfig ci matcher >>= \case
Just importable -> next $ do
liftIO $ atomically $ writeTVar tvar (Just importable)
return True

View file

@ -67,6 +67,7 @@ import Annex.Export
import Annex.TaggedPush
import Annex.CurrentBranch
import Annex.Import (canImportKeys)
import Annex.CheckIgnore
import Types.FileMatcher
import qualified Database.Export as Export
import Utility.Bloom
@ -471,7 +472,7 @@ importRemote importcontent o mergeconfig remote currbranch
else Just (asTopFilePath p)
if canImportKeys remote importcontent
then do
Command.Import.seekRemote remote branch subdir importcontent
Command.Import.seekRemote remote branch subdir importcontent (CheckGitIgnore True)
void $ mergeRemote remote currbranch mergeconfig o
else warning $ "Cannot import from " ++ Remote.name remote ++ " when not syncing content."
where

View file

@ -72,6 +72,9 @@ to tell it what branch to track. For example:
git config remote.myremote.annex-tracking-branch master
git annex sync --content
Any files that are gitignored will not be included in the import,
but will be left on the remote.
When the special remote has a preferred content expression set by
[[git-annex-wanted]](1), it will be honored when importing from it.
Files that are not preferred content of the remote will not be
@ -166,10 +169,6 @@ and `--reinject-duplicates` documentation below.
Also, causes .gitignore to not take effect when adding files.
* `--no-check-gitignore`
Add gitignored files.
* file matching options
Many of the [[git-annex-matching-options]](1)
@ -186,6 +185,10 @@ and `--reinject-duplicates` documentation below.
Setting this to "cpus" will run one job per CPU core.
* `--no-check-gitignore`
Add gitignored files.
* `--json`
Enable JSON output. This is intended to be parsed by programs that use