git-annex/CmdLine/Batch.hs

229 lines
7.3 KiB
Haskell
Raw Normal View History

{- git-annex batch commands
-
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module CmdLine.Batch where
import Annex.Common
import qualified Annex
import Types.Command
import CmdLine.Action
import CmdLine.GitAnnex.Options
import CmdLine.Seek
import Options.Applicative
import Limit
import Types.FileMatcher
import Annex.BranchState
import Annex.WorkTree
import Annex.Content
import Annex.Concurrent
import Types.Concurrency
data BatchMode = Batch BatchFormat | NoBatch
data BatchFormat = BatchFormat BatchSeparator BatchKeys
data BatchSeparator = BatchLine | BatchNull
newtype BatchKeys = BatchKeys Bool
parseBatchOption :: Bool -> Parser BatchMode
parseBatchOption supportbatchkeysoption = go
<$> switch
( long "batch"
<> help batchhelp
)
<*> batchkeysswitch
<*> flag BatchLine BatchNull
( short 'z'
<> help "null delimited batch input"
)
where
go True False batchseparator =
Batch (BatchFormat batchseparator (BatchKeys False))
go _ True batchseparator =
Batch (BatchFormat batchseparator (BatchKeys True))
go _ _ _ = NoBatch
batchhelp = "enable batch mode" ++
if supportbatchkeysoption
then ", with files input"
else ""
batchkeyshelp = "enable batch mode, with keys input"
batchkeysswitch
| supportbatchkeysoption = switch
( long "batch-keys"
<> help batchkeyshelp
)
| otherwise = pure False
2015-12-21 16:57:13 +00:00
-- A batchable command can run in batch mode, or not.
-- In batch mode, one line at a time is read, parsed, and a reply output to
-- stdout. In non batch mode, the command's parameters are parsed and
-- a reply output for each.
--
-- Note that the actions are not run concurrently.
batchable :: (opts -> SeekInput -> String -> Annex Bool) -> Parser opts -> CmdParamsDesc -> CommandParser
batchable handler parser paramdesc = batchseeker <$> batchparser
where
batchparser = (,,)
<$> parser
<*> parseBatchOption False
<*> cmdParams paramdesc
batchseeker (opts, NoBatch, params) =
mapM_ (\p -> go NoBatch opts (SeekInput [p], p)) params
batchseeker (opts, batchmode@(Batch fmt), params) =
batchOnly Nothing params $
batchInput fmt (pure . Right) (go batchmode opts)
go batchmode opts (si, p) =
unlessM (handler opts si p) $
batchBadInput batchmode
-- bad input is indicated by an empty line in batch mode. In non batch
-- mode, exit on bad input.
batchBadInput :: BatchMode -> Annex ()
batchBadInput NoBatch = liftIO exitFailure
batchBadInput _ = liftIO $ putStrLn ""
2015-12-21 16:57:13 +00:00
-- Reads lines of batch mode input, runs a parser, and passes the result
-- to the action.
--
-- Note that if the batch input includes a worktree filename, it should
-- be converted to relative. Normally, filename parameters are passed
-- through git ls-files, which makes them relative, but batch mode does
-- not use that, and absolute worktree files are likely to cause breakage.
batchInput :: BatchFormat -> (String -> Annex (Either String v)) -> ((SeekInput, v) -> Annex ()) -> Annex ()
batchInput fmt parser a = go =<< batchLines fmt
2015-12-22 16:20:39 +00:00
where
go [] = return ()
go (l:rest) = do
either parseerr (\v -> a (SeekInput [l], v)) =<< parser l
go rest
parseerr s = giveup $ "Batch input parse failure: " ++ s
2016-01-19 21:46:46 +00:00
batchLines :: BatchFormat -> Annex [String]
batchLines (BatchFormat sep _) = do
checkBatchConcurrency
enableInteractiveBranchAccess
liftIO $ splitter <$> getContents
where
splitter = case sep of
BatchLine -> lines
BatchNull -> elimemptyend . splitc '\0'
-- When there is a trailing null on the input, eliminate the empty
-- string that splitc generates. Other empty strings elsewhere in
-- the list are preserved. This is the same effect as how `lines`
-- handles a trailing newline.
elimemptyend [] = []
elimemptyend (x:[])
| null x = []
| otherwise = [x]
elimemptyend (x:rest) = x : elimemptyend rest
-- When concurrency is enabled at the command line, it is used in batch
-- mode. But, if it's only set in git config, don't use it, because the
-- program using batch mode may not expect interleaved output.
checkBatchConcurrency :: Annex ()
checkBatchConcurrency = Annex.getState Annex.concurrency >>= \case
ConcurrencyCmdLine _ -> noop
ConcurrencyGitConfig _ ->
setConcurrency (ConcurrencyGitConfig (Concurrent 1))
batchCommandAction :: CommandStart -> Annex ()
batchCommandAction = commandAction . batchCommandStart
2016-01-19 21:46:46 +00:00
-- The batch mode user expects to read a line of output, and it's up to the
-- CommandStart to generate that output as it succeeds or fails to do its
-- job. However, if it stops without doing anything, it won't generate
-- any output. This modifies it so in that case, an empty line is printed.
batchCommandStart :: CommandStart -> CommandStart
batchCommandStart a = a >>= \case
Just v -> return (Just v)
Nothing -> do
batchBadInput (Batch (BatchFormat BatchLine (BatchKeys False)))
return Nothing
2016-01-20 16:46:00 +00:00
-- Reads lines of batch input and passes the filepaths to a CommandStart
-- to handle them.
--
-- File matching options are checked, and non-matching files skipped.
batchFiles :: BatchFormat -> ((SeekInput, RawFilePath) -> CommandStart) -> Annex ()
batchFiles fmt a = batchFilesKeys fmt $ \(si, v) -> case v of
Right f -> a (si, f)
Left _k -> return Nothing
batchFilesKeys :: BatchFormat -> ((SeekInput, Either Key RawFilePath) -> CommandStart) -> Annex ()
batchFilesKeys fmt a = do
matcher <- getMatcher
go $ \si v -> case v of
Right f ->
let f' = toRawFilePath f
in ifM (matcher $ MatchingFile $ FileInfo f' f' Nothing)
( a (si, Right f')
, return Nothing
)
Left k -> a (si, Left k)
where
go a' = batchInput fmt parser (batchCommandAction . uncurry a')
parser = case fmt of
-- Absolute filepaths are converted to relative,
-- because in non-batch mode, that is done when
-- CmdLine.Seek uses git ls-files.
BatchFormat _ (BatchKeys False) ->
Right . Right . fromRawFilePath
<$$> liftIO . relPathCwdToFile . toRawFilePath
BatchFormat _ (BatchKeys True) -> \i ->
pure $ case deserializeKey i of
Just k -> Right (Left k)
Nothing -> Left "not a valid key"
batchAnnexedFiles :: BatchFormat -> AnnexedFileSeeker -> Annex ()
batchAnnexedFiles fmt seeker = batchAnnexed fmt seeker (const (return Nothing))
-- Reads lines of batch input and passes filepaths to the AnnexedFileSeeker
-- to handle them. Or, with --batch-keys, passes keys to the keyaction.
--
-- Matching options are checked, and non-matching items skipped.
batchAnnexed :: BatchFormat -> AnnexedFileSeeker -> ((SeekInput, Key, ActionItem) -> CommandStart) -> Annex ()
batchAnnexed fmt seeker keyaction = do
matcher <- getMatcher
batchFilesKeys fmt $ \(si, v) ->
case v of
use lookupKeyStaged in --batch code paths Make --batch mode handle unstaged annexed files consistently whether the file is unlocked or not. Before this, a unstaged locked file would have the symlink on disk examined and operated on in --batch mode, while an unstaged unlocked file would be skipped. Note that, when not in batch mode, unstaged files are skipped over too. That is actually somewhat new behavior; as late as 7.20191114 a command like `git-annex whereis .` would operate on unstaged locked files and skip over unstaged unlocked files. That changed during optimisation of CmdLine.Seek with apparently little fanfare or notice. Turns out that rmurl still behaved that way when given an unstaged file on the command line. It was changed to use lookupKeyStaged to handle its --batch mode. That also affected its non-batch mode, but since that's just catching up to the change earlier made to most other commands, I have not mentioed that in the changelog. It may be that other uses of lookupKey should also change to lookupKeyStaged. But it may also be that would slow down some things, or lead to unwanted behavior changes, so I've kept the changes minimal for now. An example of a place where the use of lookupKey is better than lookupKeyStaged is in Command.AddUrl, where it looks to see if the file already exists, and adds the url to the file when so. It does not matter there whether the file is staged or not (when it's locked). The use of lookupKey in Command.Unused likewise seems good (and faster). Sponsored-by: Nicholas Golder-Manning on Patreon
2022-10-26 18:23:06 +00:00
Right f -> lookupKeyStaged f >>= \case
Nothing -> return Nothing
Just k -> checkpresent k $
startAction seeker si f k
Left k -> ifM (matcher (MatchingInfo (mkinfo k)))
( checkpresent k $
keyaction (si, k, mkActionItem k)
, return Nothing)
where
checkpresent k cont = case checkContentPresent seeker of
Just v -> do
present <- inAnnex k
if present == v
then cont
else return Nothing
Nothing -> cont
mkinfo k = ProvidedInfo
{ providedFilePath = Nothing
, providedKey = Just k
, providedFileSize = Nothing
, providedMimeType = Nothing
, providedMimeEncoding = Nothing
, providedLinkType = Nothing
}
batchOnly :: Maybe KeyOptions -> CmdParams -> Annex () -> Annex ()
batchOnly Nothing [] a = a
batchOnly _ _ _ = giveup "Cannot combine batch option with file or key options"