2012-05-31 23:47:18 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
2021-09-02 17:45:21 +00:00
|
|
|
- Copyright 2012-2021 Joey Hess <id@joeyh.name>
|
2012-05-31 23:47:18 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2012-05-31 23:47:18 +00:00
|
|
|
-}
|
|
|
|
|
2023-04-10 16:56:45 +00:00
|
|
|
{-# LANGUAGE ApplicativeDo, OverloadedStrings #-}
|
2019-02-26 16:06:19 +00:00
|
|
|
|
2012-05-31 23:47:18 +00:00
|
|
|
module Command.Import where
|
|
|
|
|
|
|
|
import Command
|
2015-05-11 16:57:47 +00:00
|
|
|
import qualified Git
|
2012-05-31 23:47:18 +00:00
|
|
|
import qualified Annex
|
|
|
|
import qualified Command.Add
|
2017-02-09 19:40:44 +00:00
|
|
|
import qualified Command.Reinject
|
2019-02-26 16:06:19 +00:00
|
|
|
import qualified Types.Remote as Remote
|
2019-02-26 17:11:25 +00:00
|
|
|
import qualified Git.Ref
|
2013-08-11 18:31:54 +00:00
|
|
|
import Utility.CopyFile
|
2020-07-03 17:41:57 +00:00
|
|
|
import Utility.OptParse
|
2013-08-20 15:00:52 +00:00
|
|
|
import Backend
|
|
|
|
import Types.KeySource
|
2015-04-29 17:46:12 +00:00
|
|
|
import Annex.CheckIgnore
|
2015-04-30 18:03:24 +00:00
|
|
|
import Annex.NumCopies
|
2015-12-02 18:48:42 +00:00
|
|
|
import Annex.FileMatcher
|
2017-02-09 19:32:22 +00:00
|
|
|
import Annex.Ingest
|
|
|
|
import Annex.InodeSentinal
|
2019-02-26 16:06:19 +00:00
|
|
|
import Annex.Import
|
2020-03-06 15:57:15 +00:00
|
|
|
import Annex.Perms
|
2019-03-01 18:44:22 +00:00
|
|
|
import Annex.RemoteTrackingBranch
|
2017-02-09 19:32:22 +00:00
|
|
|
import Utility.InodeCache
|
2017-02-07 21:35:51 +00:00
|
|
|
import Logs.Location
|
2019-02-26 16:06:19 +00:00
|
|
|
import Git.FilePath
|
|
|
|
import Git.Types
|
2019-02-26 17:11:25 +00:00
|
|
|
import Types.Import
|
2019-06-25 15:37:52 +00:00
|
|
|
import Utility.Metered
|
2020-11-04 18:20:37 +00:00
|
|
|
import qualified Utility.RawFilePath as R
|
2012-05-31 23:47:18 +00:00
|
|
|
|
2019-04-10 21:02:56 +00:00
|
|
|
import Control.Concurrent.STM
|
2023-03-01 19:55:58 +00:00
|
|
|
import System.PosixCompat.Files (isDirectory, isSymbolicLink, isRegularFile)
|
2019-04-10 21:02:56 +00:00
|
|
|
|
2015-07-08 16:33:27 +00:00
|
|
|
cmd :: Command
|
2018-02-19 18:28:17 +00:00
|
|
|
cmd = notBareRepo $
|
2022-06-29 17:28:08 +00:00
|
|
|
withAnnexOptions opts $
|
2018-02-19 18:28:17 +00:00
|
|
|
command "import" SectionCommon
|
2019-11-19 17:26:27 +00:00
|
|
|
"add a tree of files to the repository"
|
2021-11-04 18:33:07 +00:00
|
|
|
(paramPaths ++ "|BRANCH")
|
2019-03-06 17:10:29 +00:00
|
|
|
(seek <$$> optParser)
|
2020-10-19 19:36:18 +00:00
|
|
|
where
|
|
|
|
opts =
|
2022-06-29 17:28:08 +00:00
|
|
|
[ backendOption
|
|
|
|
, jobsOption
|
2020-10-19 19:36:18 +00:00
|
|
|
, jsonOptions
|
|
|
|
, jsonProgressOption
|
|
|
|
-- These options are only used when importing from a
|
|
|
|
-- directory, not from a special remote. So it's ok
|
|
|
|
-- to use LimitDiskFiles.
|
|
|
|
, fileMatchingOptions LimitDiskFiles
|
|
|
|
]
|
2013-08-11 18:31:54 +00:00
|
|
|
|
2019-02-26 16:06:19 +00:00
|
|
|
data ImportOptions
|
|
|
|
= LocalImportOptions
|
|
|
|
{ importFiles :: CmdParams
|
|
|
|
, duplicateMode :: DuplicateMode
|
Added --no-check-gitignore option for finer grained control than using --force.
add, addurl, importfeed, import: Added --no-check-gitignore option
for finer grained control than using --force.
(--force is used for too many different things, and at least one
of these also uses it for something else. I would like to reduce
--force's footprint until it only forces drops or a few other data
losses. For now, --force still disables checking ignores too.)
addunused: Don't check .gitignores when adding files. This is a behavior
change, but I justify it by analogy with git add of a gitignored file
adding it, asking to add all unused files back should add them all back,
not skip some. The old behavior was surprising.
In Command.Lock and Command.ReKey, CheckGitIgnore False does not change
behavior, it only makes explicit what is done. Since these commands are run
on annexed files, the file is already checked into git, so git add won't
check ignores.
2020-09-18 17:12:04 +00:00
|
|
|
, checkGitIgnoreOption :: CheckGitIgnore
|
2019-02-26 16:06:19 +00:00
|
|
|
}
|
|
|
|
| RemoteImportOptions
|
|
|
|
{ importFromRemote :: DeferredParse Remote
|
|
|
|
, importToBranch :: Branch
|
|
|
|
, importToSubDir :: Maybe FilePath
|
2020-07-03 17:41:57 +00:00
|
|
|
, importContent :: Bool
|
2020-09-30 14:41:59 +00:00
|
|
|
, checkGitIgnoreOption :: CheckGitIgnore
|
2019-02-26 16:06:19 +00:00
|
|
|
}
|
2013-12-04 17:13:30 +00:00
|
|
|
|
2015-07-13 15:15:21 +00:00
|
|
|
optParser :: CmdParamsDesc -> Parser ImportOptions
|
2019-03-06 17:10:29 +00:00
|
|
|
optParser desc = do
|
|
|
|
ps <- cmdParams desc
|
2023-04-05 19:46:51 +00:00
|
|
|
mfromremote <- optional $ mkParseRemoteOption <$> parseFromOption
|
2020-07-03 17:41:57 +00:00
|
|
|
content <- invertableSwitch "content" True
|
|
|
|
( help "do not get contents of imported files"
|
|
|
|
)
|
2019-03-06 17:10:29 +00:00
|
|
|
dupmode <- fromMaybe Default <$> optional duplicateModeParser
|
Added --no-check-gitignore option for finer grained control than using --force.
add, addurl, importfeed, import: Added --no-check-gitignore option
for finer grained control than using --force.
(--force is used for too many different things, and at least one
of these also uses it for something else. I would like to reduce
--force's footprint until it only forces drops or a few other data
losses. For now, --force still disables checking ignores too.)
addunused: Don't check .gitignores when adding files. This is a behavior
change, but I justify it by analogy with git add of a gitignored file
adding it, asking to add all unused files back should add them all back,
not skip some. The old behavior was surprising.
In Command.Lock and Command.ReKey, CheckGitIgnore False does not change
behavior, it only makes explicit what is done. Since these commands are run
on annexed files, the file is already checked into git, so git add won't
check ignores.
2020-09-18 17:12:04 +00:00
|
|
|
ic <- Command.Add.checkGitIgnoreSwitch
|
2019-03-06 17:10:29 +00:00
|
|
|
return $ case mfromremote of
|
Added --no-check-gitignore option for finer grained control than using --force.
add, addurl, importfeed, import: Added --no-check-gitignore option
for finer grained control than using --force.
(--force is used for too many different things, and at least one
of these also uses it for something else. I would like to reduce
--force's footprint until it only forces drops or a few other data
losses. For now, --force still disables checking ignores too.)
addunused: Don't check .gitignores when adding files. This is a behavior
change, but I justify it by analogy with git add of a gitignored file
adding it, asking to add all unused files back should add them all back,
not skip some. The old behavior was surprising.
In Command.Lock and Command.ReKey, CheckGitIgnore False does not change
behavior, it only makes explicit what is done. Since these commands are run
on annexed files, the file is already checked into git, so git add won't
check ignores.
2020-09-18 17:12:04 +00:00
|
|
|
Nothing -> LocalImportOptions ps dupmode ic
|
2019-03-06 17:10:29 +00:00
|
|
|
Just r -> case ps of
|
|
|
|
[bs] ->
|
|
|
|
let (branch, subdir) = separate (== ':') bs
|
|
|
|
in RemoteImportOptions r
|
2021-08-11 00:45:02 +00:00
|
|
|
(Ref (encodeBS branch))
|
2019-03-06 17:10:29 +00:00
|
|
|
(if null subdir then Nothing else Just subdir)
|
2020-07-03 17:41:57 +00:00
|
|
|
content
|
2020-09-30 14:41:59 +00:00
|
|
|
ic
|
2019-03-06 17:10:29 +00:00
|
|
|
_ -> giveup "expected BRANCH[:SUBDIR]"
|
2019-02-26 16:06:19 +00:00
|
|
|
|
|
|
|
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
|
|
|
|
deriving (Eq)
|
2013-08-11 18:31:54 +00:00
|
|
|
|
2015-07-13 15:15:21 +00:00
|
|
|
duplicateModeParser :: Parser DuplicateMode
|
|
|
|
duplicateModeParser =
|
|
|
|
flag' Duplicate
|
|
|
|
( long "duplicate"
|
|
|
|
<> help "do not delete source files"
|
|
|
|
)
|
|
|
|
<|> flag' DeDuplicate
|
|
|
|
( long "deduplicate"
|
|
|
|
<> help "delete source files whose content was imported before"
|
|
|
|
)
|
|
|
|
<|> flag' CleanDuplicates
|
|
|
|
( long "clean-duplicates"
|
|
|
|
<> help "delete duplicate source files (import nothing)"
|
|
|
|
)
|
|
|
|
<|> flag' SkipDuplicates
|
|
|
|
( long "skip-duplicates"
|
2017-02-09 19:40:44 +00:00
|
|
|
<> help "import only new files (do not delete source files)"
|
|
|
|
)
|
|
|
|
<|> flag' ReinjectDuplicates
|
|
|
|
( long "reinject-duplicates"
|
|
|
|
<> help "import new files, and reinject the content of files that were imported before"
|
2015-07-13 15:15:21 +00:00
|
|
|
)
|
2013-08-11 18:31:54 +00:00
|
|
|
|
2015-07-13 15:15:21 +00:00
|
|
|
seek :: ImportOptions -> CommandSeek
|
2019-06-19 16:35:08 +00:00
|
|
|
seek o@(LocalImportOptions {}) = startConcurrency commandStages $ do
|
2020-11-04 18:20:37 +00:00
|
|
|
repopath <- liftIO . absPath =<< fromRepo Git.repoPath
|
|
|
|
inrepops <- liftIO $ filter (dirContains repopath)
|
|
|
|
<$> mapM (absPath . toRawFilePath) (importFiles o)
|
2015-05-11 16:57:47 +00:00
|
|
|
unless (null inrepops) $ do
|
2023-04-10 16:56:45 +00:00
|
|
|
qp <- coreQuotePath <$> Annex.getGitConfig
|
|
|
|
giveup $ decodeBS $ quote qp $
|
|
|
|
"cannot import files from inside the working tree (use git annex add instead): "
|
|
|
|
<> quotedPaths inrepops
|
2015-12-02 18:48:42 +00:00
|
|
|
largematcher <- largeFilesMatcher
|
2019-12-20 19:01:34 +00:00
|
|
|
addunlockedmatcher <- addUnlockedMatcher
|
Added --no-check-gitignore option for finer grained control than using --force.
add, addurl, importfeed, import: Added --no-check-gitignore option
for finer grained control than using --force.
(--force is used for too many different things, and at least one
of these also uses it for something else. I would like to reduce
--force's footprint until it only forces drops or a few other data
losses. For now, --force still disables checking ignores too.)
addunused: Don't check .gitignores when adding files. This is a behavior
change, but I justify it by analogy with git add of a gitignored file
adding it, asking to add all unused files back should add them all back,
not skip some. The old behavior was surprising.
In Command.Lock and Command.ReKey, CheckGitIgnore False does not change
behavior, it only makes explicit what is done. Since these commands are run
on annexed files, the file is already checked into git, so git add won't
check ignores.
2020-09-18 17:12:04 +00:00
|
|
|
(commandAction . startLocal o addunlockedmatcher largematcher (duplicateMode o))
|
2018-10-01 18:12:06 +00:00
|
|
|
`withPathContents` importFiles o
|
2019-06-19 16:35:08 +00:00
|
|
|
seek o@(RemoteImportOptions {}) = startConcurrency commandStages $ do
|
2019-02-26 16:06:19 +00:00
|
|
|
r <- getParsed (importFromRemote o)
|
2019-03-09 17:57:49 +00:00
|
|
|
unlessM (Remote.isImportSupported r) $
|
|
|
|
giveup "That remote does not support imports."
|
2019-02-26 16:06:19 +00:00
|
|
|
subdir <- maybe
|
|
|
|
(pure Nothing)
|
2019-12-09 17:49:05 +00:00
|
|
|
(Just <$$> inRepo . toTopFilePath . toRawFilePath)
|
2019-02-26 16:06:19 +00:00
|
|
|
(importToSubDir o)
|
2020-09-30 14:41:59 +00:00
|
|
|
seekRemote r (importToBranch o) subdir (importContent o) (checkGitIgnoreOption o)
|
2012-05-31 23:47:18 +00:00
|
|
|
|
2020-11-04 18:20:37 +00:00
|
|
|
startLocal :: ImportOptions -> AddUnlockedMatcher -> GetFileMatcher -> DuplicateMode -> (RawFilePath, RawFilePath) -> CommandStart
|
Added --no-check-gitignore option for finer grained control than using --force.
add, addurl, importfeed, import: Added --no-check-gitignore option
for finer grained control than using --force.
(--force is used for too many different things, and at least one
of these also uses it for something else. I would like to reduce
--force's footprint until it only forces drops or a few other data
losses. For now, --force still disables checking ignores too.)
addunused: Don't check .gitignores when adding files. This is a behavior
change, but I justify it by analogy with git add of a gitignored file
adding it, asking to add all unused files back should add them all back,
not skip some. The old behavior was surprising.
In Command.Lock and Command.ReKey, CheckGitIgnore False does not change
behavior, it only makes explicit what is done. Since these commands are run
on annexed files, the file is already checked into git, so git add won't
check ignores.
2020-09-18 17:12:04 +00:00
|
|
|
startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
|
2020-11-04 18:20:37 +00:00
|
|
|
ifM (liftIO $ isRegularFile <$> R.getSymbolicLinkStatus srcfile)
|
2020-09-14 20:49:33 +00:00
|
|
|
( starting "import" ai si pickaction
|
2012-05-31 23:47:18 +00:00
|
|
|
, stop
|
|
|
|
)
|
2013-08-20 15:00:52 +00:00
|
|
|
where
|
2021-03-12 18:09:19 +00:00
|
|
|
ai = ActionItemTreeFile destfile
|
2020-09-14 20:49:33 +00:00
|
|
|
si = SeekInput []
|
|
|
|
|
2015-03-31 19:36:02 +00:00
|
|
|
deletedup k = do
|
2023-04-10 21:03:41 +00:00
|
|
|
showNote $ UnquotedString $ "duplicate of " ++ serializeKey k
|
2015-10-09 15:09:46 +00:00
|
|
|
verifyExisting k destfile
|
2015-04-30 18:03:24 +00:00
|
|
|
( do
|
2020-11-04 18:20:37 +00:00
|
|
|
liftIO $ R.removeLink srcfile
|
2015-04-30 18:03:24 +00:00
|
|
|
next $ return True
|
|
|
|
, do
|
2015-04-30 18:10:28 +00:00
|
|
|
warning "Could not verify that the content is still present in the annex; not removing from the import location."
|
2015-04-30 18:03:24 +00:00
|
|
|
stop
|
|
|
|
)
|
2017-02-09 19:40:44 +00:00
|
|
|
reinject k = do
|
|
|
|
showNote "reinjecting"
|
|
|
|
Command.Reinject.perform srcfile k
|
2017-02-09 19:32:22 +00:00
|
|
|
importfile ld k = checkdestdir $ do
|
Added --no-check-gitignore option for finer grained control than using --force.
add, addurl, importfeed, import: Added --no-check-gitignore option
for finer grained control than using --force.
(--force is used for too many different things, and at least one
of these also uses it for something else. I would like to reduce
--force's footprint until it only forces drops or a few other data
losses. For now, --force still disables checking ignores too.)
addunused: Don't check .gitignores when adding files. This is a behavior
change, but I justify it by analogy with git add of a gitignored file
adding it, asking to add all unused files back should add them all back,
not skip some. The old behavior was surprising.
In Command.Lock and Command.ReKey, CheckGitIgnore False does not change
behavior, it only makes explicit what is done. Since these commands are run
on annexed files, the file is already checked into git, so git add won't
check ignores.
2020-09-18 17:12:04 +00:00
|
|
|
ignored <- checkIgnored (checkGitIgnoreOption o) destfile
|
2015-04-29 17:46:12 +00:00
|
|
|
if ignored
|
2015-04-29 17:56:41 +00:00
|
|
|
then do
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
warning $ "not importing " <> QuotedPath destfile <> " which is .gitignored (use --no-check-gitignore to override)"
|
2015-04-29 17:56:41 +00:00
|
|
|
stop
|
2015-04-29 17:46:12 +00:00
|
|
|
else do
|
2020-11-04 18:20:37 +00:00
|
|
|
existing <- liftIO (catchMaybeIO $ R.getSymbolicLinkStatus destfile)
|
2015-04-29 17:56:41 +00:00
|
|
|
case existing of
|
2017-02-09 19:32:22 +00:00
|
|
|
Nothing -> importfilechecked ld k
|
2015-11-05 22:45:52 +00:00
|
|
|
Just s
|
2015-04-29 17:56:41 +00:00
|
|
|
| isDirectory s -> notoverwriting "(is a directory)"
|
2022-06-28 19:28:14 +00:00
|
|
|
| isSymbolicLink s -> ifM (Annex.getRead Annex.force)
|
2019-03-18 20:40:15 +00:00
|
|
|
( do
|
2020-11-04 18:20:37 +00:00
|
|
|
liftIO $ removeWhenExistsWith R.removeLink destfile
|
2019-03-18 20:40:15 +00:00
|
|
|
importfilechecked ld k
|
|
|
|
, notoverwriting "(is a symlink)"
|
|
|
|
)
|
2022-06-28 19:28:14 +00:00
|
|
|
| otherwise -> ifM (Annex.getRead Annex.force)
|
2015-04-29 17:56:41 +00:00
|
|
|
( do
|
2020-11-04 18:20:37 +00:00
|
|
|
liftIO $ removeWhenExistsWith R.removeLink destfile
|
2017-02-09 19:32:22 +00:00
|
|
|
importfilechecked ld k
|
2015-04-29 17:56:41 +00:00
|
|
|
, notoverwriting "(use --force to override, or a duplication option such as --deduplicate to clean up)"
|
|
|
|
)
|
2015-11-05 22:45:52 +00:00
|
|
|
checkdestdir cont = do
|
|
|
|
let destdir = parentDir destfile
|
2020-11-04 18:20:37 +00:00
|
|
|
existing <- liftIO (catchMaybeIO $ R.getSymbolicLinkStatus destdir)
|
2015-11-05 22:45:52 +00:00
|
|
|
case existing of
|
|
|
|
Nothing -> cont
|
|
|
|
Just s
|
|
|
|
| isDirectory s -> cont
|
|
|
|
| otherwise -> do
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
warning $ "not importing " <> QuotedPath destfile <> " because " <> QuotedPath destdir <> " is not a directory"
|
2015-11-05 22:45:52 +00:00
|
|
|
stop
|
|
|
|
|
2017-02-09 19:32:22 +00:00
|
|
|
importfilechecked ld k = do
|
|
|
|
-- Move or copy the src file to the dest file.
|
|
|
|
-- The dest file is what will be ingested.
|
2020-03-06 15:57:15 +00:00
|
|
|
createWorkTreeDirectory (parentDir destfile)
|
2021-09-02 17:45:21 +00:00
|
|
|
unwind <- liftIO $ if mode == Duplicate || mode == SkipDuplicates
|
|
|
|
then do
|
|
|
|
void $ copyFileExternal CopyAllMetaData
|
|
|
|
(fromRawFilePath srcfile)
|
|
|
|
(fromRawFilePath destfile)
|
|
|
|
return $ removeWhenExistsWith R.removeLink destfile
|
|
|
|
else do
|
2022-06-22 20:47:34 +00:00
|
|
|
moveFile srcfile destfile
|
|
|
|
return $ moveFile destfile srcfile
|
2021-09-02 17:45:21 +00:00
|
|
|
-- Make sure that the dest file has its write permissions
|
|
|
|
-- removed; the src file normally already did, but may
|
|
|
|
-- have imported it from a filesystem that does not allow
|
|
|
|
-- removing write permissions, to a repo on a filesystem
|
|
|
|
-- that does.
|
|
|
|
when (lockingFile (lockDownConfig ld)) $ do
|
|
|
|
freezeContent destfile
|
|
|
|
checkLockedDownWritePerms destfile srcfile >>= \case
|
|
|
|
Just err -> do
|
|
|
|
liftIO unwind
|
2023-04-10 16:56:45 +00:00
|
|
|
qp <- coreQuotePath <$> Annex.getGitConfig
|
|
|
|
giveup (decodeBS $ quote qp err)
|
2021-09-02 17:45:21 +00:00
|
|
|
Nothing -> noop
|
2017-02-09 19:32:22 +00:00
|
|
|
-- Get the inode cache of the dest file. It should be
|
2020-03-06 15:57:15 +00:00
|
|
|
-- weakly the same as the originally locked down file's
|
2017-02-09 19:32:22 +00:00
|
|
|
-- inode cache. (Since the file may have been copied,
|
|
|
|
-- its inodes may not be the same.)
|
2022-06-14 20:38:34 +00:00
|
|
|
s <- liftIO $ R.getSymbolicLinkStatus destfile
|
|
|
|
newcache <- withTSDelta $ \d -> liftIO $ toInodeCache d destfile s
|
2017-02-09 19:32:22 +00:00
|
|
|
let unchanged = case (newcache, inodeCache (keySource ld)) of
|
|
|
|
(_, Nothing) -> True
|
|
|
|
(Just newc, Just c) | compareWeak c newc -> True
|
|
|
|
_ -> False
|
|
|
|
unless unchanged $
|
|
|
|
giveup "changed while it was being added"
|
|
|
|
-- The LockedDown needs to be adjusted, since the destfile
|
|
|
|
-- is what will be ingested.
|
|
|
|
let ld' = ld
|
|
|
|
{ keySource = KeySource
|
2020-11-04 18:20:37 +00:00
|
|
|
{ keyFilename = destfile
|
|
|
|
, contentLocation = destfile
|
2017-02-09 19:32:22 +00:00
|
|
|
, inodeCache = newcache
|
|
|
|
}
|
|
|
|
}
|
2015-12-02 18:48:42 +00:00
|
|
|
ifM (checkFileMatcher largematcher destfile)
|
fix add overwrite race with git-annex add to annex
This is not a complete fix for all such races, only the one where a
large file gets changed while adding and gets added to git rather than
to the annex.
addLink needs to go away, any caller of it is probably subject to the
same kind of race. (Also, addLink itself fails to check gitignore when
symlinks are not supported.)
ingestAdd no longer checks gitignore. (It didn't check it consistently
before either, since there were cases where it did not run git add!)
When git-annex import calls it, it's already checked gitignore itself
earlier. When git-annex add calls it, it's usually on files found
by withFilesNotInGit, which handles checking ignores.
There was one other case, when git-annex add --batch calls it. In that
case, old git-annex behaved rather badly, it would seem to add the file,
but git add would later fail, leaving the file as an unstaged annex symlink.
That behavior has also been fixed.
Sponsored-by: Brett Eisenberg on Patreon
2022-06-14 17:20:42 +00:00
|
|
|
( ingestAdd' nullMeterUpdate (Just ld') (Just k)
|
2017-02-09 19:32:22 +00:00
|
|
|
>>= maybe
|
|
|
|
stop
|
|
|
|
(\addedk -> next $ Command.Add.cleanup addedk True)
|
2022-08-03 15:16:04 +00:00
|
|
|
, Command.Add.addSmall (DryRun False) destfile s
|
2015-12-02 18:48:42 +00:00
|
|
|
)
|
2015-04-29 17:56:41 +00:00
|
|
|
notoverwriting why = do
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
warning $ "not overwriting existing " <> QuotedPath destfile <> " " <> UnquotedString why
|
2015-04-29 17:56:41 +00:00
|
|
|
stop
|
2017-02-09 19:32:22 +00:00
|
|
|
lockdown a = do
|
2019-12-20 19:01:34 +00:00
|
|
|
let mi = MatchingFile $ FileInfo
|
2021-03-01 20:34:40 +00:00
|
|
|
{ contentFile = srcfile
|
2020-11-04 18:20:37 +00:00
|
|
|
, matchFile = destfile
|
2020-12-14 21:42:02 +00:00
|
|
|
, matchKey = Nothing
|
2019-12-20 19:01:34 +00:00
|
|
|
}
|
2021-01-25 17:55:01 +00:00
|
|
|
lockingfile <- not <$> addUnlocked addunlockedmatcher mi True
|
2017-02-09 19:32:22 +00:00
|
|
|
-- Minimal lock down with no hard linking so nothing
|
|
|
|
-- has to be done to clean up from it.
|
|
|
|
let cfg = LockDownConfig
|
|
|
|
{ lockingFile = lockingfile
|
2019-05-07 17:04:39 +00:00
|
|
|
, hardlinkFileTmpDir = Nothing
|
2021-09-02 17:45:21 +00:00
|
|
|
-- The write perms of the file may not be able to be
|
|
|
|
-- removed, if it's being imported from a crippled
|
|
|
|
-- filesystem. So lockDown is asked to not check
|
|
|
|
-- the write perms. They will be checked later, after
|
|
|
|
-- the file gets copied into the repository.
|
|
|
|
, checkWritePerms = False
|
2017-02-09 19:32:22 +00:00
|
|
|
}
|
2020-11-04 18:20:37 +00:00
|
|
|
v <- lockDown cfg (fromRawFilePath srcfile)
|
2015-03-31 19:36:02 +00:00
|
|
|
case v of
|
2017-02-09 19:32:22 +00:00
|
|
|
Just ld -> do
|
|
|
|
backend <- chooseBackend destfile
|
2020-05-15 16:51:09 +00:00
|
|
|
k <- fst <$> genKey (keySource ld) nullMeterUpdate backend
|
|
|
|
a (ld, k)
|
2017-02-09 19:32:22 +00:00
|
|
|
Nothing -> stop
|
|
|
|
checkdup k dupa notdupa = ifM (isKnownKey k)
|
|
|
|
( dupa
|
|
|
|
, notdupa
|
|
|
|
)
|
|
|
|
pickaction = lockdown $ \(ld, k) -> case mode of
|
|
|
|
DeDuplicate -> checkdup k (deletedup k) (importfile ld k)
|
|
|
|
CleanDuplicates -> checkdup k
|
|
|
|
(deletedup k)
|
|
|
|
(skipbecause "not duplicate")
|
|
|
|
SkipDuplicates -> checkdup k
|
|
|
|
(skipbecause "duplicate")
|
|
|
|
(importfile ld k)
|
2017-02-09 19:40:44 +00:00
|
|
|
ReinjectDuplicates -> checkdup k
|
|
|
|
(reinject k)
|
|
|
|
(importfile ld k)
|
2017-02-09 19:32:22 +00:00
|
|
|
_ -> importfile ld k
|
2023-04-10 21:03:41 +00:00
|
|
|
skipbecause s = do
|
|
|
|
showNote (s <> "; skipping")
|
|
|
|
next (return True)
|
2015-04-30 18:03:24 +00:00
|
|
|
|
2020-11-04 18:20:37 +00:00
|
|
|
verifyExisting :: Key -> RawFilePath -> (CommandPerform, CommandPerform) -> CommandPerform
|
2015-10-09 15:09:46 +00:00
|
|
|
verifyExisting key destfile (yes, no) = do
|
2015-04-30 18:03:24 +00:00
|
|
|
-- Look up the numcopies setting for the file that it would be
|
|
|
|
-- imported to, if it were imported.
|
2021-01-06 18:11:08 +00:00
|
|
|
(needcopies, mincopies) <- getFileNumMinCopies destfile
|
2015-04-30 18:03:24 +00:00
|
|
|
|
2015-10-09 18:57:32 +00:00
|
|
|
(tocheck, preverified) <- verifiableCopies key []
|
2021-01-06 18:11:08 +00:00
|
|
|
verifyEnoughCopiesToDrop [] key Nothing needcopies mincopies [] preverified tocheck
|
2015-10-09 15:09:46 +00:00
|
|
|
(const yes) no
|
2019-02-26 16:06:19 +00:00
|
|
|
|
2020-09-30 14:41:59 +00:00
|
|
|
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CheckGitIgnore -> CommandSeek
|
|
|
|
seekRemote remote branch msubdir importcontent ci = do
|
2019-02-26 17:11:25 +00:00
|
|
|
importtreeconfig <- case msubdir of
|
|
|
|
Nothing -> return ImportTree
|
2019-03-01 18:32:45 +00:00
|
|
|
Just subdir ->
|
|
|
|
let mk tree = pure $ ImportSubTree subdir tree
|
|
|
|
in fromtrackingbranch Git.Ref.tree >>= \case
|
|
|
|
Just tree -> mk tree
|
|
|
|
Nothing -> inRepo (Git.Ref.tree branch) >>= \case
|
|
|
|
Just tree -> mk tree
|
|
|
|
Nothing -> giveup $ "Unable to find base tree for branch " ++ fromRef branch
|
|
|
|
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
trackingcommit <- fromtrackingbranch Git.Ref.sha
|
2019-11-11 22:20:35 +00:00
|
|
|
cmode <- annexCommitMode <$> Annex.getGitConfig
|
2019-11-11 20:15:05 +00:00
|
|
|
let importcommitconfig = ImportCommitConfig trackingcommit cmode importmessage
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
let commitimport = commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig
|
2019-02-26 18:22:08 +00:00
|
|
|
|
2019-04-10 21:02:56 +00:00
|
|
|
importabletvar <- liftIO $ newTVarIO Nothing
|
2020-09-30 14:41:59 +00:00
|
|
|
void $ includeCommandAction (listContents remote importtreeconfig ci importabletvar)
|
2019-04-10 21:02:56 +00:00
|
|
|
liftIO (atomically (readTVar importabletvar)) >>= \case
|
|
|
|
Nothing -> return ()
|
2023-06-01 17:46:16 +00:00
|
|
|
Just importable -> importChanges remote importtreeconfig importcontent False importable >>= \case
|
2023-05-31 19:45:23 +00:00
|
|
|
ImportUnfinished -> warning $ UnquotedString $ concat
|
2019-04-10 21:02:56 +00:00
|
|
|
[ "Failed to import some files from "
|
|
|
|
, Remote.name remote
|
|
|
|
, ". Re-run command to resume import."
|
|
|
|
]
|
2023-05-31 19:45:23 +00:00
|
|
|
ImportFinished imported -> void $
|
finish CommandStart transition
The hoped for optimisation of CommandStart with -J did not materialize.
In fact, not runnign CommandStart in parallel is slower than -J3.
So, CommandStart are still run in parallel.
(The actual bad performance I've been seeing with -J in my big repo
has to do with building the remoteList.)
But, this is still progress toward making -J faster, because it gets rid
of the onlyActionOn roadblock in the way of making CommandCleanup jobs
run separate from CommandPerform jobs.
Added OnlyActionOn constructor for ActionItem which fixes the
onlyActionOn breakage in the last commit.
Made CustomOutput include an ActionItem, so even things using it can
specify OnlyActionOn.
In Command.Move and Command.Sync, there were CommandStarts that used
includeCommandAction, so output messages, which is no longer allowed.
Fixed by using startingCustomOutput, but that's still not quite right,
since it prevents message display for the includeCommandAction run
inside it too.
2019-06-12 13:23:26 +00:00
|
|
|
includeCommandAction $
|
|
|
|
commitimport imported
|
2019-02-26 17:11:25 +00:00
|
|
|
where
|
|
|
|
importmessage = "import from " ++ Remote.name remote
|
2019-03-01 18:32:45 +00:00
|
|
|
|
2019-02-26 17:11:25 +00:00
|
|
|
tb = mkRemoteTrackingBranch remote branch
|
2019-03-01 18:32:45 +00:00
|
|
|
|
|
|
|
fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)
|
2019-02-26 18:22:08 +00:00
|
|
|
|
2021-10-06 21:05:32 +00:00
|
|
|
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize))) -> CommandStart
|
2020-09-30 14:41:59 +00:00
|
|
|
listContents remote importtreeconfig ci tvar = starting "list" ai si $
|
2020-12-21 20:03:27 +00:00
|
|
|
listContents' remote importtreeconfig ci $ \importable -> do
|
2020-12-22 18:35:02 +00:00
|
|
|
liftIO $ atomically $ writeTVar tvar importable
|
2020-12-21 20:03:27 +00:00
|
|
|
next $ return True
|
|
|
|
where
|
2023-04-08 19:48:32 +00:00
|
|
|
ai = ActionItemOther (Just (UnquotedString (Remote.name remote)))
|
2020-12-21 20:03:27 +00:00
|
|
|
si = SeekInput []
|
|
|
|
|
2021-10-06 21:05:32 +00:00
|
|
|
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
|
2020-12-21 20:03:27 +00:00
|
|
|
listContents' remote importtreeconfig ci a =
|
2020-09-30 14:10:03 +00:00
|
|
|
makeImportMatcher remote >>= \case
|
2020-12-22 18:20:11 +00:00
|
|
|
Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case
|
|
|
|
Right importable -> a importable
|
|
|
|
Left e -> giveup $ "Unable to list contents of " ++ Remote.name remote ++ ": " ++ show e
|
2020-09-30 14:10:03 +00:00
|
|
|
Left err -> giveup $ unwords
|
|
|
|
[ "Cannot import from"
|
|
|
|
, Remote.name remote
|
|
|
|
, "because of a problem with its configuration:"
|
|
|
|
, err
|
|
|
|
]
|
2019-02-26 19:25:28 +00:00
|
|
|
|
2023-06-01 17:46:16 +00:00
|
|
|
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> Imported -> CommandStart
|
|
|
|
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig imported =
|
2020-09-14 20:49:33 +00:00
|
|
|
starting "update" ai si $ do
|
2023-06-01 17:46:16 +00:00
|
|
|
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig imported
|
2019-02-26 18:22:08 +00:00
|
|
|
next $ updateremotetrackingbranch importcommit
|
|
|
|
where
|
2023-04-08 19:48:32 +00:00
|
|
|
ai = ActionItemOther (Just $ UnquotedString $ fromRef $ fromRemoteTrackingBranch tb)
|
2020-09-14 20:49:33 +00:00
|
|
|
si = SeekInput []
|
2019-02-26 18:22:08 +00:00
|
|
|
-- Update the tracking branch. Done even when there
|
|
|
|
-- is nothing new to import, to make sure it exists.
|
|
|
|
updateremotetrackingbranch importcommit =
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
case importcommit <|> trackingcommit of
|
2019-02-26 18:22:08 +00:00
|
|
|
Just c -> do
|
2019-03-01 18:44:22 +00:00
|
|
|
setRemoteTrackingBranch tb c
|
2019-02-26 18:22:08 +00:00
|
|
|
return True
|
|
|
|
Nothing -> do
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
warning $ UnquotedString $ "Nothing to import and " ++ fromRef branch ++ " does not exist."
|
2019-02-26 18:22:08 +00:00
|
|
|
return False
|