2012-05-31 23:47:18 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
2019-02-26 16:06:19 +00:00
|
|
|
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
|
2012-05-31 23:47:18 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2012-05-31 23:47:18 +00:00
|
|
|
-}
|
|
|
|
|
2019-02-26 16:06:19 +00:00
|
|
|
{-# LANGUAGE ApplicativeDo #-}
|
|
|
|
|
2012-05-31 23:47:18 +00:00
|
|
|
module Command.Import where
|
|
|
|
|
|
|
|
import Command
|
2015-05-11 16:57:47 +00:00
|
|
|
import qualified Git
|
2012-05-31 23:47:18 +00:00
|
|
|
import qualified Annex
|
|
|
|
import qualified Command.Add
|
2017-02-09 19:40:44 +00:00
|
|
|
import qualified Command.Reinject
|
2019-02-26 16:06:19 +00:00
|
|
|
import qualified Types.Remote as Remote
|
2019-02-26 17:11:25 +00:00
|
|
|
import qualified Git.Ref
|
2013-08-11 18:31:54 +00:00
|
|
|
import Utility.CopyFile
|
2013-08-20 15:00:52 +00:00
|
|
|
import Backend
|
|
|
|
import Types.KeySource
|
2015-04-29 17:46:12 +00:00
|
|
|
import Annex.CheckIgnore
|
2015-04-30 18:03:24 +00:00
|
|
|
import Annex.NumCopies
|
2015-12-02 18:48:42 +00:00
|
|
|
import Annex.FileMatcher
|
2017-02-09 19:32:22 +00:00
|
|
|
import Annex.Ingest
|
|
|
|
import Annex.InodeSentinal
|
2019-02-26 16:06:19 +00:00
|
|
|
import Annex.Import
|
2020-03-06 15:57:15 +00:00
|
|
|
import Annex.Perms
|
2019-03-01 18:44:22 +00:00
|
|
|
import Annex.RemoteTrackingBranch
|
2017-02-09 19:32:22 +00:00
|
|
|
import Utility.InodeCache
|
2017-02-07 21:35:51 +00:00
|
|
|
import Logs.Location
|
2019-02-26 16:06:19 +00:00
|
|
|
import Git.FilePath
|
|
|
|
import Git.Types
|
2019-02-26 17:11:25 +00:00
|
|
|
import Types.Import
|
2019-06-25 15:37:52 +00:00
|
|
|
import Utility.Metered
|
2012-05-31 23:47:18 +00:00
|
|
|
|
2019-04-10 21:02:56 +00:00
|
|
|
import Control.Concurrent.STM
|
|
|
|
|
2015-07-08 16:33:27 +00:00
|
|
|
cmd :: Command
|
2018-02-19 18:28:17 +00:00
|
|
|
cmd = notBareRepo $
|
2020-05-26 15:27:47 +00:00
|
|
|
withGlobalOptions [jobsOption, jsonOptions, jsonProgressOption, fileMatchingOptions] $
|
2018-02-19 18:28:17 +00:00
|
|
|
command "import" SectionCommon
|
2019-11-19 17:26:27 +00:00
|
|
|
"add a tree of files to the repository"
|
2019-03-06 17:10:29 +00:00
|
|
|
(paramPaths ++ "|BRANCH[:SUBDIR]")
|
|
|
|
(seek <$$> optParser)
|
2013-08-11 18:31:54 +00:00
|
|
|
|
2019-02-26 16:06:19 +00:00
|
|
|
data ImportOptions
|
|
|
|
= LocalImportOptions
|
|
|
|
{ importFiles :: CmdParams
|
|
|
|
, duplicateMode :: DuplicateMode
|
|
|
|
}
|
|
|
|
| RemoteImportOptions
|
|
|
|
{ importFromRemote :: DeferredParse Remote
|
|
|
|
, importToBranch :: Branch
|
|
|
|
, importToSubDir :: Maybe FilePath
|
|
|
|
}
|
2013-12-04 17:13:30 +00:00
|
|
|
|
2015-07-13 15:15:21 +00:00
|
|
|
optParser :: CmdParamsDesc -> Parser ImportOptions
|
2019-03-06 17:10:29 +00:00
|
|
|
optParser desc = do
|
|
|
|
ps <- cmdParams desc
|
|
|
|
mfromremote <- optional $ parseRemoteOption <$> parseFromOption
|
|
|
|
dupmode <- fromMaybe Default <$> optional duplicateModeParser
|
|
|
|
return $ case mfromremote of
|
|
|
|
Nothing -> LocalImportOptions ps dupmode
|
|
|
|
Just r -> case ps of
|
|
|
|
[bs] ->
|
|
|
|
let (branch, subdir) = separate (== ':') bs
|
|
|
|
in RemoteImportOptions r
|
2020-04-07 21:41:09 +00:00
|
|
|
(Ref (encodeBS' branch))
|
2019-03-06 17:10:29 +00:00
|
|
|
(if null subdir then Nothing else Just subdir)
|
|
|
|
_ -> giveup "expected BRANCH[:SUBDIR]"
|
2019-02-26 16:06:19 +00:00
|
|
|
|
|
|
|
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
|
|
|
|
deriving (Eq)
|
2013-08-11 18:31:54 +00:00
|
|
|
|
2015-07-13 15:15:21 +00:00
|
|
|
duplicateModeParser :: Parser DuplicateMode
|
|
|
|
duplicateModeParser =
|
|
|
|
flag' Duplicate
|
|
|
|
( long "duplicate"
|
|
|
|
<> help "do not delete source files"
|
|
|
|
)
|
|
|
|
<|> flag' DeDuplicate
|
|
|
|
( long "deduplicate"
|
|
|
|
<> help "delete source files whose content was imported before"
|
|
|
|
)
|
|
|
|
<|> flag' CleanDuplicates
|
|
|
|
( long "clean-duplicates"
|
|
|
|
<> help "delete duplicate source files (import nothing)"
|
|
|
|
)
|
|
|
|
<|> flag' SkipDuplicates
|
|
|
|
( long "skip-duplicates"
|
2017-02-09 19:40:44 +00:00
|
|
|
<> help "import only new files (do not delete source files)"
|
|
|
|
)
|
|
|
|
<|> flag' ReinjectDuplicates
|
|
|
|
( long "reinject-duplicates"
|
|
|
|
<> help "import new files, and reinject the content of files that were imported before"
|
2015-07-13 15:15:21 +00:00
|
|
|
)
|
2013-08-11 18:31:54 +00:00
|
|
|
|
2015-07-13 15:15:21 +00:00
|
|
|
seek :: ImportOptions -> CommandSeek
|
2019-06-19 16:35:08 +00:00
|
|
|
seek o@(LocalImportOptions {}) = startConcurrency commandStages $ do
|
2019-12-09 17:49:05 +00:00
|
|
|
repopath <- liftIO . absPath . fromRawFilePath =<< fromRepo Git.repoPath
|
2015-07-13 15:15:21 +00:00
|
|
|
inrepops <- liftIO $ filter (dirContains repopath) <$> mapM absPath (importFiles o)
|
2015-05-11 16:57:47 +00:00
|
|
|
unless (null inrepops) $ do
|
2016-11-16 01:29:54 +00:00
|
|
|
giveup $ "cannot import files from inside the working tree (use git annex add instead): " ++ unwords inrepops
|
2015-12-02 18:48:42 +00:00
|
|
|
largematcher <- largeFilesMatcher
|
2019-12-20 19:01:34 +00:00
|
|
|
addunlockedmatcher <- addUnlockedMatcher
|
|
|
|
(commandAction . startLocal addunlockedmatcher largematcher (duplicateMode o))
|
2018-10-01 18:12:06 +00:00
|
|
|
`withPathContents` importFiles o
|
2019-06-19 16:35:08 +00:00
|
|
|
seek o@(RemoteImportOptions {}) = startConcurrency commandStages $ do
|
2019-02-26 16:06:19 +00:00
|
|
|
r <- getParsed (importFromRemote o)
|
2019-03-09 17:57:49 +00:00
|
|
|
unlessM (Remote.isImportSupported r) $
|
|
|
|
giveup "That remote does not support imports."
|
2019-02-26 16:06:19 +00:00
|
|
|
subdir <- maybe
|
|
|
|
(pure Nothing)
|
2019-12-09 17:49:05 +00:00
|
|
|
(Just <$$> inRepo . toTopFilePath . toRawFilePath)
|
2019-02-26 16:06:19 +00:00
|
|
|
(importToSubDir o)
|
2019-02-26 18:22:08 +00:00
|
|
|
seekRemote r (importToBranch o) subdir
|
2012-05-31 23:47:18 +00:00
|
|
|
|
2019-12-20 19:01:34 +00:00
|
|
|
startLocal :: AddUnlockedMatcher -> GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
|
|
|
|
startLocal addunlockedmatcher largematcher mode (srcfile, destfile) =
|
2012-05-31 23:47:18 +00:00
|
|
|
ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile)
|
2020-02-21 13:34:59 +00:00
|
|
|
( starting "import" (ActionItemWorkTreeFile destfile')
|
make CommandStart return a StartMessage
The goal is to be able to run CommandStart in the main thread when -J is
used, rather than unncessarily passing it off to a worker thread, which
incurs overhead that is signficant when the CommandStart is going to
quickly decide to stop.
To do that, the message it displays needs to be displayed in the worker
thread, after the CommandStart has run.
Also, the change will mean that CommandStart will no longer necessarily
run with the same Annex state as CommandPerform. While its docs already
said it should avoid modifying Annex state, I audited all the
CommandStart code as part of the conversion. (Note that CommandSeek
already sometimes runs with a different Annex state, and that has not been
a source of any problems, so I am not too worried that this change will
lead to breakage going forward.)
The only modification of Annex state I found was it calling
allowMessages in some Commands that default to noMessages. Dealt with
that by adding a startCustomOutput and a startingUsualMessages.
This lets a command start with noMessages and then select the output it
wants for each CommandStart.
One bit of breakage: onlyActionOn has been removed from commands that used it.
The plan is that, since a StartMessage contains an ActionItem,
when a Key can be extracted from that, the parallel job runner can
run onlyActionOn' automatically. Then commands won't need to worry about
this detail. Future work.
Otherwise, this was a fairly straightforward process of making each
CommandStart compile again. Hopefully other behavior changes were mostly
avoided.
In a few cases, a command had a CommandStart that called a CommandPerform
that then called showStart multiple times. I have collapsed those
down to a single start action. The main command to perhaps suffer from it
is Command.Direct, which used to show a start for each file, and no
longer does.
Another minor behavior change is that some commands used showStart
before, but had an associated file and a Key available, so were changed
to ShowStart with an ActionItemAssociatedFile. That will not change the
normal output or behavior, but --json output will now include the key.
This should not break it for anyone using a real json parser.
2019-06-06 19:42:30 +00:00
|
|
|
pickaction
|
2012-05-31 23:47:18 +00:00
|
|
|
, stop
|
|
|
|
)
|
2013-08-20 15:00:52 +00:00
|
|
|
where
|
2020-02-21 13:34:59 +00:00
|
|
|
destfile' = toRawFilePath destfile
|
|
|
|
|
2015-03-31 19:36:02 +00:00
|
|
|
deletedup k = do
|
2019-01-14 17:03:35 +00:00
|
|
|
showNote $ "duplicate of " ++ serializeKey k
|
2015-10-09 15:09:46 +00:00
|
|
|
verifyExisting k destfile
|
2015-04-30 18:03:24 +00:00
|
|
|
( do
|
|
|
|
liftIO $ removeFile srcfile
|
|
|
|
next $ return True
|
|
|
|
, do
|
2015-04-30 18:10:28 +00:00
|
|
|
warning "Could not verify that the content is still present in the annex; not removing from the import location."
|
2015-04-30 18:03:24 +00:00
|
|
|
stop
|
|
|
|
)
|
2017-02-09 19:40:44 +00:00
|
|
|
reinject k = do
|
|
|
|
showNote "reinjecting"
|
|
|
|
Command.Reinject.perform srcfile k
|
2017-02-09 19:32:22 +00:00
|
|
|
importfile ld k = checkdestdir $ do
|
2015-04-29 17:46:12 +00:00
|
|
|
ignored <- not <$> Annex.getState Annex.force <&&> checkIgnored destfile
|
|
|
|
if ignored
|
2015-04-29 17:56:41 +00:00
|
|
|
then do
|
|
|
|
warning $ "not importing " ++ destfile ++ " which is .gitignored (use --force to override)"
|
|
|
|
stop
|
2015-04-29 17:46:12 +00:00
|
|
|
else do
|
2015-04-29 17:56:41 +00:00
|
|
|
existing <- liftIO (catchMaybeIO $ getSymbolicLinkStatus destfile)
|
|
|
|
case existing of
|
2017-02-09 19:32:22 +00:00
|
|
|
Nothing -> importfilechecked ld k
|
2015-11-05 22:45:52 +00:00
|
|
|
Just s
|
2015-04-29 17:56:41 +00:00
|
|
|
| isDirectory s -> notoverwriting "(is a directory)"
|
2019-03-18 20:40:15 +00:00
|
|
|
| isSymbolicLink s -> ifM (Annex.getState Annex.force)
|
|
|
|
( do
|
|
|
|
liftIO $ nukeFile destfile
|
|
|
|
importfilechecked ld k
|
|
|
|
, notoverwriting "(is a symlink)"
|
|
|
|
)
|
2015-04-29 17:56:41 +00:00
|
|
|
| otherwise -> ifM (Annex.getState Annex.force)
|
|
|
|
( do
|
|
|
|
liftIO $ nukeFile destfile
|
2017-02-09 19:32:22 +00:00
|
|
|
importfilechecked ld k
|
2015-04-29 17:56:41 +00:00
|
|
|
, notoverwriting "(use --force to override, or a duplication option such as --deduplicate to clean up)"
|
|
|
|
)
|
2015-11-05 22:45:52 +00:00
|
|
|
checkdestdir cont = do
|
|
|
|
let destdir = parentDir destfile
|
|
|
|
existing <- liftIO (catchMaybeIO $ getSymbolicLinkStatus destdir)
|
|
|
|
case existing of
|
|
|
|
Nothing -> cont
|
|
|
|
Just s
|
|
|
|
| isDirectory s -> cont
|
|
|
|
| otherwise -> do
|
|
|
|
warning $ "not importing " ++ destfile ++ " because " ++ destdir ++ " is not a directory"
|
|
|
|
stop
|
|
|
|
|
2017-02-09 19:32:22 +00:00
|
|
|
importfilechecked ld k = do
|
|
|
|
-- Move or copy the src file to the dest file.
|
|
|
|
-- The dest file is what will be ingested.
|
2020-03-06 15:57:15 +00:00
|
|
|
createWorkTreeDirectory (parentDir destfile)
|
2015-04-29 17:56:41 +00:00
|
|
|
liftIO $ if mode == Duplicate || mode == SkipDuplicates
|
|
|
|
then void $ copyFileExternal CopyAllMetaData srcfile destfile
|
|
|
|
else moveFile srcfile destfile
|
2017-02-09 19:32:22 +00:00
|
|
|
-- Get the inode cache of the dest file. It should be
|
2020-03-06 15:57:15 +00:00
|
|
|
-- weakly the same as the originally locked down file's
|
2017-02-09 19:32:22 +00:00
|
|
|
-- inode cache. (Since the file may have been copied,
|
|
|
|
-- its inodes may not be the same.)
|
2020-02-21 13:34:59 +00:00
|
|
|
newcache <- withTSDelta $ liftIO . genInodeCache destfile'
|
2017-02-09 19:32:22 +00:00
|
|
|
let unchanged = case (newcache, inodeCache (keySource ld)) of
|
|
|
|
(_, Nothing) -> True
|
|
|
|
(Just newc, Just c) | compareWeak c newc -> True
|
|
|
|
_ -> False
|
|
|
|
unless unchanged $
|
|
|
|
giveup "changed while it was being added"
|
|
|
|
-- The LockedDown needs to be adjusted, since the destfile
|
|
|
|
-- is what will be ingested.
|
|
|
|
let ld' = ld
|
|
|
|
{ keySource = KeySource
|
2020-02-21 13:34:59 +00:00
|
|
|
{ keyFilename = destfile'
|
|
|
|
, contentLocation = destfile'
|
2017-02-09 19:32:22 +00:00
|
|
|
, inodeCache = newcache
|
|
|
|
}
|
|
|
|
}
|
2015-12-02 18:48:42 +00:00
|
|
|
ifM (checkFileMatcher largematcher destfile)
|
2019-06-25 15:37:52 +00:00
|
|
|
( ingestAdd' nullMeterUpdate (Just ld') (Just k)
|
2017-02-09 19:32:22 +00:00
|
|
|
>>= maybe
|
|
|
|
stop
|
|
|
|
(\addedk -> next $ Command.Add.cleanup addedk True)
|
2020-02-21 13:34:59 +00:00
|
|
|
, next $ Command.Add.addSmall destfile'
|
2015-12-02 18:48:42 +00:00
|
|
|
)
|
2015-04-29 17:56:41 +00:00
|
|
|
notoverwriting why = do
|
|
|
|
warning $ "not overwriting existing " ++ destfile ++ " " ++ why
|
|
|
|
stop
|
2017-02-09 19:32:22 +00:00
|
|
|
lockdown a = do
|
2019-12-20 19:01:34 +00:00
|
|
|
let mi = MatchingFile $ FileInfo
|
|
|
|
{ currFile = toRawFilePath srcfile
|
|
|
|
, matchFile = toRawFilePath destfile
|
|
|
|
}
|
|
|
|
lockingfile <- not <$> addUnlocked addunlockedmatcher mi
|
2017-02-09 19:32:22 +00:00
|
|
|
-- Minimal lock down with no hard linking so nothing
|
|
|
|
-- has to be done to clean up from it.
|
|
|
|
let cfg = LockDownConfig
|
|
|
|
{ lockingFile = lockingfile
|
2019-05-07 17:04:39 +00:00
|
|
|
, hardlinkFileTmpDir = Nothing
|
2017-02-09 19:32:22 +00:00
|
|
|
}
|
|
|
|
v <- lockDown cfg srcfile
|
2015-03-31 19:36:02 +00:00
|
|
|
case v of
|
2017-02-09 19:32:22 +00:00
|
|
|
Just ld -> do
|
|
|
|
backend <- chooseBackend destfile
|
2020-05-15 16:51:09 +00:00
|
|
|
k <- fst <$> genKey (keySource ld) nullMeterUpdate backend
|
|
|
|
a (ld, k)
|
2017-02-09 19:32:22 +00:00
|
|
|
Nothing -> stop
|
|
|
|
checkdup k dupa notdupa = ifM (isKnownKey k)
|
|
|
|
( dupa
|
|
|
|
, notdupa
|
|
|
|
)
|
|
|
|
pickaction = lockdown $ \(ld, k) -> case mode of
|
|
|
|
DeDuplicate -> checkdup k (deletedup k) (importfile ld k)
|
|
|
|
CleanDuplicates -> checkdup k
|
|
|
|
(deletedup k)
|
|
|
|
(skipbecause "not duplicate")
|
|
|
|
SkipDuplicates -> checkdup k
|
|
|
|
(skipbecause "duplicate")
|
|
|
|
(importfile ld k)
|
2017-02-09 19:40:44 +00:00
|
|
|
ReinjectDuplicates -> checkdup k
|
|
|
|
(reinject k)
|
|
|
|
(importfile ld k)
|
2017-02-09 19:32:22 +00:00
|
|
|
_ -> importfile ld k
|
|
|
|
skipbecause s = showNote (s ++ "; skipping") >> next (return True)
|
2015-04-30 18:03:24 +00:00
|
|
|
|
2015-10-09 15:09:46 +00:00
|
|
|
verifyExisting :: Key -> FilePath -> (CommandPerform, CommandPerform) -> CommandPerform
|
|
|
|
verifyExisting key destfile (yes, no) = do
|
2015-04-30 18:03:24 +00:00
|
|
|
-- Look up the numcopies setting for the file that it would be
|
|
|
|
-- imported to, if it were imported.
|
|
|
|
need <- getFileNumCopies destfile
|
|
|
|
|
2015-10-09 18:57:32 +00:00
|
|
|
(tocheck, preverified) <- verifiableCopies key []
|
2015-10-09 19:48:02 +00:00
|
|
|
verifyEnoughCopiesToDrop [] key Nothing need [] preverified tocheck
|
2015-10-09 15:09:46 +00:00
|
|
|
(const yes) no
|
2019-02-26 16:06:19 +00:00
|
|
|
|
2019-02-26 18:22:08 +00:00
|
|
|
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> CommandSeek
|
2019-03-09 17:34:57 +00:00
|
|
|
seekRemote remote branch msubdir = do
|
2019-02-26 17:11:25 +00:00
|
|
|
importtreeconfig <- case msubdir of
|
|
|
|
Nothing -> return ImportTree
|
2019-03-01 18:32:45 +00:00
|
|
|
Just subdir ->
|
|
|
|
let mk tree = pure $ ImportSubTree subdir tree
|
|
|
|
in fromtrackingbranch Git.Ref.tree >>= \case
|
|
|
|
Just tree -> mk tree
|
|
|
|
Nothing -> inRepo (Git.Ref.tree branch) >>= \case
|
|
|
|
Just tree -> mk tree
|
|
|
|
Nothing -> giveup $ "Unable to find base tree for branch " ++ fromRef branch
|
|
|
|
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
trackingcommit <- fromtrackingbranch Git.Ref.sha
|
2019-11-11 22:20:35 +00:00
|
|
|
cmode <- annexCommitMode <$> Annex.getGitConfig
|
2019-11-11 20:15:05 +00:00
|
|
|
let importcommitconfig = ImportCommitConfig trackingcommit cmode importmessage
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
let commitimport = commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig
|
2019-02-26 18:22:08 +00:00
|
|
|
|
2019-04-10 21:02:56 +00:00
|
|
|
importabletvar <- liftIO $ newTVarIO Nothing
|
|
|
|
void $ includeCommandAction (listContents remote importabletvar)
|
|
|
|
liftIO (atomically (readTVar importabletvar)) >>= \case
|
|
|
|
Nothing -> return ()
|
|
|
|
Just importable -> downloadImport remote importtreeconfig importable >>= \case
|
|
|
|
Nothing -> warning $ concat
|
|
|
|
[ "Failed to import some files from "
|
|
|
|
, Remote.name remote
|
|
|
|
, ". Re-run command to resume import."
|
|
|
|
]
|
|
|
|
Just imported -> void $
|
finish CommandStart transition
The hoped for optimisation of CommandStart with -J did not materialize.
In fact, not runnign CommandStart in parallel is slower than -J3.
So, CommandStart are still run in parallel.
(The actual bad performance I've been seeing with -J in my big repo
has to do with building the remoteList.)
But, this is still progress toward making -J faster, because it gets rid
of the onlyActionOn roadblock in the way of making CommandCleanup jobs
run separate from CommandPerform jobs.
Added OnlyActionOn constructor for ActionItem which fixes the
onlyActionOn breakage in the last commit.
Made CustomOutput include an ActionItem, so even things using it can
specify OnlyActionOn.
In Command.Move and Command.Sync, there were CommandStarts that used
includeCommandAction, so output messages, which is no longer allowed.
Fixed by using startingCustomOutput, but that's still not quite right,
since it prevents message display for the includeCommandAction run
inside it too.
2019-06-12 13:23:26 +00:00
|
|
|
includeCommandAction $
|
|
|
|
commitimport imported
|
2019-02-26 17:11:25 +00:00
|
|
|
where
|
|
|
|
importmessage = "import from " ++ Remote.name remote
|
2019-03-01 18:32:45 +00:00
|
|
|
|
2019-02-26 17:11:25 +00:00
|
|
|
tb = mkRemoteTrackingBranch remote branch
|
2019-03-01 18:32:45 +00:00
|
|
|
|
|
|
|
fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)
|
2019-02-26 18:22:08 +00:00
|
|
|
|
2019-04-10 21:02:56 +00:00
|
|
|
listContents :: Remote -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart
|
make CommandStart return a StartMessage
The goal is to be able to run CommandStart in the main thread when -J is
used, rather than unncessarily passing it off to a worker thread, which
incurs overhead that is signficant when the CommandStart is going to
quickly decide to stop.
To do that, the message it displays needs to be displayed in the worker
thread, after the CommandStart has run.
Also, the change will mean that CommandStart will no longer necessarily
run with the same Annex state as CommandPerform. While its docs already
said it should avoid modifying Annex state, I audited all the
CommandStart code as part of the conversion. (Note that CommandSeek
already sometimes runs with a different Annex state, and that has not been
a source of any problems, so I am not too worried that this change will
lead to breakage going forward.)
The only modification of Annex state I found was it calling
allowMessages in some Commands that default to noMessages. Dealt with
that by adding a startCustomOutput and a startingUsualMessages.
This lets a command start with noMessages and then select the output it
wants for each CommandStart.
One bit of breakage: onlyActionOn has been removed from commands that used it.
The plan is that, since a StartMessage contains an ActionItem,
when a Key can be extracted from that, the parallel job runner can
run onlyActionOn' automatically. Then commands won't need to worry about
this detail. Future work.
Otherwise, this was a fairly straightforward process of making each
CommandStart compile again. Hopefully other behavior changes were mostly
avoided.
In a few cases, a command had a CommandStart that called a CommandPerform
that then called showStart multiple times. I have collapsed those
down to a single start action. The main command to perhaps suffer from it
is Command.Direct, which used to show a start for each file, and no
longer does.
Another minor behavior change is that some commands used showStart
before, but had an associated file and a Key available, so were changed
to ShowStart with an ActionItemAssociatedFile. That will not change the
normal output or behavior, but --json output will now include the key.
This should not break it for anyone using a real json parser.
2019-06-06 19:42:30 +00:00
|
|
|
listContents remote tvar = starting "list" (ActionItemOther (Just (Remote.name remote))) $
|
|
|
|
listImportableContents remote >>= \case
|
2019-04-10 21:02:56 +00:00
|
|
|
Nothing -> giveup $ "Unable to list contents of " ++ Remote.name remote
|
2019-05-21 18:38:00 +00:00
|
|
|
Just importable -> do
|
|
|
|
importable' <- makeImportMatcher remote >>= \case
|
|
|
|
Right matcher -> filterImportableContents remote matcher importable
|
|
|
|
Left err -> giveup $ "Cannot import from " ++ Remote.name remote ++ " because of a problem with its configuration: " ++ err
|
|
|
|
next $ do
|
|
|
|
liftIO $ atomically $ writeTVar tvar (Just importable')
|
|
|
|
return True
|
2019-02-26 19:25:28 +00:00
|
|
|
|
2020-06-23 20:07:18 +00:00
|
|
|
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents (Either Sha Key) -> CommandStart
|
make CommandStart return a StartMessage
The goal is to be able to run CommandStart in the main thread when -J is
used, rather than unncessarily passing it off to a worker thread, which
incurs overhead that is signficant when the CommandStart is going to
quickly decide to stop.
To do that, the message it displays needs to be displayed in the worker
thread, after the CommandStart has run.
Also, the change will mean that CommandStart will no longer necessarily
run with the same Annex state as CommandPerform. While its docs already
said it should avoid modifying Annex state, I audited all the
CommandStart code as part of the conversion. (Note that CommandSeek
already sometimes runs with a different Annex state, and that has not been
a source of any problems, so I am not too worried that this change will
lead to breakage going forward.)
The only modification of Annex state I found was it calling
allowMessages in some Commands that default to noMessages. Dealt with
that by adding a startCustomOutput and a startingUsualMessages.
This lets a command start with noMessages and then select the output it
wants for each CommandStart.
One bit of breakage: onlyActionOn has been removed from commands that used it.
The plan is that, since a StartMessage contains an ActionItem,
when a Key can be extracted from that, the parallel job runner can
run onlyActionOn' automatically. Then commands won't need to worry about
this detail. Future work.
Otherwise, this was a fairly straightforward process of making each
CommandStart compile again. Hopefully other behavior changes were mostly
avoided.
In a few cases, a command had a CommandStart that called a CommandPerform
that then called showStart multiple times. I have collapsed those
down to a single start action. The main command to perhaps suffer from it
is Command.Direct, which used to show a start for each file, and no
longer does.
Another minor behavior change is that some commands used showStart
before, but had an associated file and a Key available, so were changed
to ShowStart with an ActionItemAssociatedFile. That will not change the
normal output or behavior, but --json output will now include the key.
This should not break it for anyone using a real json parser.
2019-06-06 19:42:30 +00:00
|
|
|
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable =
|
|
|
|
starting "update" (ActionItemOther (Just $ fromRef $ fromRemoteTrackingBranch tb)) $ do
|
2019-02-26 18:22:08 +00:00
|
|
|
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig importable
|
|
|
|
next $ updateremotetrackingbranch importcommit
|
|
|
|
|
|
|
|
where
|
|
|
|
-- Update the tracking branch. Done even when there
|
|
|
|
-- is nothing new to import, to make sure it exists.
|
|
|
|
updateremotetrackingbranch importcommit =
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
case importcommit <|> trackingcommit of
|
2019-02-26 18:22:08 +00:00
|
|
|
Just c -> do
|
2019-03-01 18:44:22 +00:00
|
|
|
setRemoteTrackingBranch tb c
|
2019-02-26 18:22:08 +00:00
|
|
|
return True
|
|
|
|
Nothing -> do
|
|
|
|
warning $ "Nothing to import and " ++ fromRef branch ++ " does not exist."
|
|
|
|
return False
|