Added -z option to git-annex commands that use --batch, useful for
supporting filenames containing newlines.

It only controls input to --batch, the output will still be line delimited
unless --json or etc is used to get some other output. While git often
makes -z affect both input and output, I don't like trying them together,
and making it affect output would have been a significant complication,
and also git-annex output is generally not intended to be machine parsed,
unless using --json or a format option.

Commands that take pairs like "file key" still separate them with a space
in --batch mode. All such commands take care to support filenames with
spaces when parsing that, so there was no need to change it, and it would
have needed significant changes to the batch machinery to separate tose
with a null.

To make fromkey and registerurl support -z, I had to give them a --batch
option. The implicit batch mode they enter when not provided with input
parameters does not support -z as that would have complicated option
parsing. Seemed better to move these toward using the same --batch as
everything else, though the implicit batch mode can still be used.

This commit was sponsored by Ole-Morten Duesund on Patreon.
This commit is contained in:
Joey Hess 2018-09-20 16:09:21 -04:00
parent 2aae6e84af
commit 1d1054faa6
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
34 changed files with 209 additions and 67 deletions

View file

@ -5,8 +5,12 @@ git-annex (6.20180914) UNRELEASED; urgency=medium
that does not end with a slash.
* --debug shows urls accessed by git-annex, like it used to do when
git-annex used wget and curl.
* Support filenames containing newlines, though less efficiently than
other filenames.
* Fix support for filenames containing newlines when querying git
cat-file, though less efficiently than other filenames.
This should make git-annex fully support filenames containing newlines
as the rest of git's interface is used in newline-safe ways.
* Added -z option to git-annex commands that use --batch, useful for
supporting filenames containing newlines.
[ Yaroslav Halchenko ]
* debian/control

View file

@ -15,13 +15,24 @@ import Options.Applicative
import Limit
import Types.FileMatcher
data BatchMode = Batch | NoBatch
data BatchMode = Batch BatchFormat | NoBatch
data BatchFormat = BatchLine | BatchNull
parseBatchOption :: Parser BatchMode
parseBatchOption = flag NoBatch Batch
( long "batch"
<> help "enable batch mode"
)
parseBatchOption = go
<$> switch
( long "batch"
<> help "enable batch mode"
)
<*> switch
( short 'z'
<> help "null delimited batch input"
)
where
go True False = Batch BatchLine
go True True = Batch BatchNull
go False _ = NoBatch
-- A batchable command can run in batch mode, or not.
-- In batch mode, one line at a time is read, parsed, and a reply output to
@ -35,8 +46,10 @@ batchable handler parser paramdesc = batchseeker <$> batchparser
<*> parseBatchOption
<*> cmdParams paramdesc
batchseeker (opts, NoBatch, params) = mapM_ (go NoBatch opts) params
batchseeker (opts, Batch, _) = batchInput Right (go Batch opts)
batchseeker (opts, NoBatch, params) =
mapM_ (go NoBatch opts) params
batchseeker (opts, batchmode@(Batch fmt), _) =
batchInput fmt Right (go batchmode opts)
go batchmode opts p =
unlessM (handler opts p) $
@ -46,11 +59,11 @@ batchable handler parser paramdesc = batchseeker <$> batchparser
-- mode, exit on bad input.
batchBadInput :: BatchMode -> Annex ()
batchBadInput NoBatch = liftIO exitFailure
batchBadInput Batch = liftIO $ putStrLn ""
batchBadInput (Batch _) = liftIO $ putStrLn ""
-- Reads lines of batch mode input and passes to the action to handle.
batchInput :: (String -> Either String a) -> (a -> Annex ()) -> Annex ()
batchInput parser a = go =<< batchLines
batchInput :: BatchFormat -> (String -> Either String a) -> (a -> Annex ()) -> Annex ()
batchInput fmt parser a = go =<< batchLines fmt
where
go [] = return ()
go (l:rest) = do
@ -58,8 +71,12 @@ batchInput parser a = go =<< batchLines
go rest
parseerr s = giveup $ "Batch input parse failure: " ++ s
batchLines :: Annex [String]
batchLines = liftIO $ lines <$> getContents
batchLines :: BatchFormat -> Annex [String]
batchLines fmt = liftIO $ splitter <$> getContents
where
splitter = case fmt of
BatchLine -> lines
BatchNull -> splitc '\0'
-- Runs a CommandStart in batch mode.
--
@ -69,22 +86,22 @@ batchLines = liftIO $ lines <$> getContents
-- any output, so in that case, batchBadInput is used to provide the caller
-- with an empty line.
batchCommandAction :: CommandStart -> Annex ()
batchCommandAction a = maybe (batchBadInput Batch) (const noop)
batchCommandAction a = maybe (batchBadInput (Batch BatchLine)) (const noop)
=<< callCommandAction' a
-- Reads lines of batch input and passes the filepaths to a CommandStart
-- to handle them.
--
-- File matching options are not checked.
allBatchFiles :: (FilePath -> CommandStart) -> Annex ()
allBatchFiles a = batchInput Right $ batchCommandAction . a
allBatchFiles :: BatchFormat -> (FilePath -> CommandStart) -> Annex ()
allBatchFiles fmt a = batchInput fmt Right $ batchCommandAction . a
-- Like allBatchFiles, but checks the file matching options
-- and skips non-matching files.
batchFilesMatching :: (FilePath -> CommandStart) -> Annex ()
batchFilesMatching a = do
batchFilesMatching :: BatchFormat -> (FilePath -> CommandStart) -> Annex ()
batchFilesMatching fmt a = do
matcher <- getMatcher
allBatchFiles $ \f ->
allBatchFiles fmt $ \f ->
ifM (matcher $ MatchingFile $ FileInfo f f)
( a f
, return Nothing

View file

@ -59,10 +59,10 @@ seek o = allowConcurrentOutput $ do
)
)
case batchOption o of
Batch
Batch fmt
| updateOnly o ->
giveup "--update --batch is not supported"
| otherwise -> batchFilesMatching gofile
| otherwise -> batchFilesMatching fmt gofile
NoBatch -> do
l <- workTreeItems (addThese o)
let go a = a gofile l

View file

@ -97,7 +97,7 @@ seek :: AddUrlOptions -> CommandSeek
seek o = allowConcurrentOutput $ do
forM_ (addUrls o) (\u -> go (o, u))
case batchOption o of
Batch -> batchInput (parseBatchInput o) go
Batch fmt -> batchInput fmt (parseBatchInput o) go
NoBatch -> noop
where
go (o', u) = do

View file

@ -33,12 +33,12 @@ seek o = case batchOption o of
(ks:rn:[]) -> toRemote rn >>= (check ks . Just) >>= exitResult
(ks:[]) -> check ks Nothing >>= exitResult
_ -> wrongnumparams
Batch -> do
Batch fmt -> do
checker <- case params o of
(rn:[]) -> toRemote rn >>= \r -> return (flip check (Just r))
[] -> return (flip check Nothing)
_ -> wrongnumparams
batchInput Right $ checker >=> batchResult
batchInput fmt Right $ checker >=> batchResult
where
wrongnumparams = giveup "Wrong number of parameters"

View file

@ -47,7 +47,7 @@ seek :: CopyOptions -> CommandSeek
seek o = allowConcurrentOutput $ do
let go = whenAnnexed $ start o
case batchOption o of
Batch -> batchFilesMatching go
Batch fmt -> batchFilesMatching fmt go
NoBatch -> withKeyOptions
(keyOptions o) (autoMode o)
(Command.Move.startKey (fromToOptions o) Command.Move.RemoveNever)

View file

@ -54,7 +54,7 @@ parseDropFromOption = parseRemoteOption <$> strOption
seek :: DropOptions -> CommandSeek
seek o = allowConcurrentOutput $
case batchOption o of
Batch -> batchFilesMatching go
Batch fmt -> batchFilesMatching fmt go
NoBatch -> withKeyOptions (keyOptions o) (autoMode o)
(startKeys o)
(withFilesInGit go)

View file

@ -35,7 +35,7 @@ seek o = do
giveup "dropkey can cause data loss; use --force if you're sure you want to do this"
withKeys start (toDrop o)
case batchOption o of
Batch -> batchInput parsekey $ batchCommandAction . start
Batch fmt -> batchInput fmt parsekey $ batchCommandAction . start
NoBatch -> noop
where
parsekey = maybe (Left "bad key") Right . file2key

View file

@ -51,7 +51,7 @@ parseFormatOption =
seek :: FindOptions -> CommandSeek
seek o = case batchOption o of
NoBatch -> withFilesInGit go =<< workTreeItems (findThese o)
Batch -> batchFilesMatching go
Batch fmt -> batchFilesMatching fmt go
where
go = whenAnnexed $ start o

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2010, 2015 Joey Hess <id@joeyh.name>
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -21,13 +21,26 @@ cmd :: Command
cmd = notDirect $ notBareRepo $
command "fromkey" SectionPlumbing "adds a file using a specific key"
(paramRepeating (paramPair paramKey paramPath))
(withParams seek)
(seek <$$> optParser)
seek :: CmdParams -> CommandSeek
seek [] = withNothing startMass []
seek ps = do
force <- Annex.getState Annex.force
withPairs (start force) ps
data FromKeyOptions = FromKeyOptions
{ keyFilePairs :: CmdParams
, batchOption :: BatchMode
}
optParser :: CmdParamsDesc -> Parser FromKeyOptions
optParser desc = FromKeyOptions
<$> cmdParams desc
<*> parseBatchOption
seek :: FromKeyOptions -> CommandSeek
seek o = case (batchOption o, keyFilePairs o) of
(Batch fmt, _) -> withNothing (startMass fmt) []
-- older way of enabling batch input, does not support BatchNull
(NoBatch, []) -> withNothing (startMass BatchLine) []
(NoBatch, ps) -> do
force <- Annex.getState Annex.force
withPairs (start force) ps
start :: Bool -> (String, FilePath) -> CommandStart
start force (keyname, file) = do
@ -39,13 +52,13 @@ start force (keyname, file) = do
showStart "fromkey" file
next $ perform key file
startMass :: CommandStart
startMass = do
startMass :: BatchFormat -> CommandStart
startMass fmt = do
showStart' "fromkey" (Just "stdin")
next massAdd
next (massAdd fmt)
massAdd :: CommandPerform
massAdd = go True =<< map (separate (== ' ')) <$> batchLines
massAdd :: BatchFormat -> CommandPerform
massAdd fmt = go True =<< map (separate (== ' ')) <$> batchLines fmt
where
go status [] = next $ return status
go status ((keyname,f):rest) | not (null keyname) && not (null f) = do

View file

@ -42,7 +42,7 @@ seek o = allowConcurrentOutput $ do
from <- maybe (pure Nothing) (Just <$$> getParsed) (getFrom o)
let go = whenAnnexed $ start o from
case batchOption o of
Batch -> batchFilesMatching go
Batch fmt -> batchFilesMatching fmt go
NoBatch -> withKeyOptions (keyOptions o) (autoMode o)
(startKeys from)
(withFilesInGit go)

View file

@ -133,7 +133,7 @@ optParser desc = InfoOptions
seek :: InfoOptions -> CommandSeek
seek o = case batchOption o of
NoBatch -> withWords (start o) (infoFor o)
Batch -> batchInput Right (itemInfo o)
Batch fmt -> batchInput fmt Right (itemInfo o)
start :: InfoOptions -> [String] -> CommandStart
start o [] = do

View file

@ -83,10 +83,10 @@ seek o = case batchOption o of
(startKeys c o)
(seeker $ whenAnnexed $ start c o)
=<< workTreeItems (forFiles o)
Batch -> withMessageState $ \s -> case outputType s of
Batch fmt -> withMessageState $ \s -> case outputType s of
JSONOutput _ -> ifM limited
( giveup "combining --batch with file matching options is not currently supported"
, batchInput parseJSONInput $
, batchInput fmt parseJSONInput $
commandAction . startBatch
)
_ -> giveup "--batch is currently only supported in --json mode"

View file

@ -57,7 +57,7 @@ seek :: MoveOptions -> CommandSeek
seek o = allowConcurrentOutput $ do
let go = whenAnnexed $ start (fromToOptions o) (removeWhen o)
case batchOption o of
Batch -> batchFilesMatching go
Batch fmt -> batchFilesMatching fmt go
NoBatch -> withKeyOptions (keyOptions o) False
(startKey (fromToOptions o) (removeWhen o))
(withFilesInGit go)

View file

@ -49,7 +49,7 @@ batchParser s = case separate (== ' ') (reverse s) of
seek :: ReKeyOptions -> CommandSeek
seek o = case batchOption o of
Batch -> batchInput batchParser (batchCommandAction . start)
Batch fmt -> batchInput fmt batchParser (batchCommandAction . start)
NoBatch -> withPairs (start . parsekey) (reKeyThese o)
where
parsekey (file, skey) =

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2015 Joey Hess <id@joeyh.name>
- Copyright 2015-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -19,23 +19,39 @@ cmd = notDirect $ notBareRepo $
command "registerurl"
SectionPlumbing "registers an url for a key"
(paramPair paramKey paramUrl)
(withParams seek)
(seek <$$> optParser)
seek :: CmdParams -> CommandSeek
seek = withWords start
data RegisterUrlOptions = RegisterUrlOptions
{ keyUrlPairs :: CmdParams
, batchOption :: BatchMode
}
optParser :: CmdParamsDesc -> Parser RegisterUrlOptions
optParser desc = RegisterUrlOptions
<$> cmdParams desc
<*> parseBatchOption
seek :: RegisterUrlOptions -> CommandSeek
seek o = case (batchOption o, keyUrlPairs o) of
(Batch fmt, _) -> withNothing (startMass fmt) []
-- older way of enabling batch input, does not support BatchNull
(NoBatch, []) -> withNothing (startMass BatchLine) []
(NoBatch, ps) -> withWords start ps
start :: [String] -> CommandStart
start (keyname:url:[]) = do
let key = mkKey keyname
showStart' "registerurl" (Just url)
next $ perform key url
start [] = do
showStart' "registerurl" (Just "stdin")
next massAdd
start _ = giveup "specify a key and an url"
massAdd :: CommandPerform
massAdd = go True =<< map (separate (== ' ')) <$> batchLines
startMass :: BatchFormat -> CommandStart
startMass fmt = do
showStart' "registerurl" (Just "stdin")
next (massAdd fmt)
massAdd :: BatchFormat -> CommandPerform
massAdd fmt = go True =<< map (separate (== ' ')) <$> batchLines fmt
where
go status [] = next $ return status
go status ((keyname,u):rest) | not (null keyname) && not (null u) = do

View file

@ -30,7 +30,7 @@ optParser desc = RmUrlOptions
seek :: RmUrlOptions -> CommandSeek
seek o = case batchOption o of
Batch -> batchInput batchParser (batchCommandAction . start)
Batch fmt -> batchInput fmt batchParser (batchCommandAction . start)
NoBatch -> withPairs start (rmThese o)
-- Split on the last space, since a FilePath can contain whitespace,

View file

@ -30,7 +30,7 @@ optParser desc = SetPresentKeyOptions
seek :: SetPresentKeyOptions -> CommandSeek
seek o = case batchOption o of
Batch -> batchInput
Batch fmt -> batchInput fmt
(parseKeyStatus . words)
(batchCommandAction . start)
NoBatch -> either giveup (commandAction . start)

View file

@ -40,7 +40,7 @@ seek o = do
m <- remoteMap id
let go = whenAnnexed $ start m
case batchOption o of
Batch -> batchFilesMatching go
Batch fmt -> batchFilesMatching fmt go
NoBatch ->
withKeyOptions (keyOptions o) False
(startKeys m)

View file

@ -81,6 +81,11 @@ annexed content, and other symlinks.
an empty line will be output instead of the normal output produced
when adding a file.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
# SEE ALSO
[[git-annex]](1)

View file

@ -83,6 +83,11 @@ be used to get better filenames.
Enables batch mode, in which lines containing urls to add are read from
stdin.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
* `--with-files`
When batch mode is enabled, makes it parse lines of the form: "$url $file"

View file

@ -20,14 +20,19 @@ For example, to force use of the SHA1 backend:
# OPTIONS
* `--backend=name`
Specifies which key-value backend to use.
* `--batch`
Enable batch mode, in which a line containing the filename is read from
stdin, the key is output to stdout (with a trailing newline), and repeat.
* `--backend=name`
* `-z`
Specifies which key-value backend to use.
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
# SEE ALSO

View file

@ -88,6 +88,11 @@ Copies the content of files from or to another remote.
machine-parseable, you may want to use --json in combination with
--batch.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
* `--json`
Enable JSON output. This is intended to be parsed by programs that use

View file

@ -87,6 +87,11 @@ safe to do so.
match specified matching options, or it is not an annexed file,
a blank line is output in response instead.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
* `--json`
Enable JSON output. This is intended to be parsed by programs that use

View file

@ -68,6 +68,11 @@ finds files in the current directory and its subdirectories.
or otherwise doesn't meet the matching options, an empty line
will be output instead.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
# SEE ALSO
[[git-annex]](1)

View file

@ -13,9 +13,8 @@ in the git repository to link to a specified key.
Multiple pairs of file and key can be given in a single command line.
If no key and file pair are specified on the command line, they are
instead read from stdin. Any number of lines can be provided in this
mode, each containing a key and filename, separated by a single space.
If no key and file pair are specified on the command line, batch input
is used, the same as if the --batch option were specified.
Normally the key is a git-annex formatted key. However, to make it easier
to use this to add urls, if the key cannot be parsed as a key, and is a
@ -30,6 +29,19 @@ to do that.
Allow making a file link to a key whose content is not in the local
repository. The key may not be known to git-annex at all.
* `--batch`
In batch input mode, lines are read from stdin, and each line
should contain a key and filename, separated by a single space.
* `-z`
When in batch mode, the input is delimited by nulls instead of the usual
newlines.
(Note that for this to be used, you have to explicitly enable batch mode
with `--batch`)
# SEE ALSO
[[git-annex]](1)

View file

@ -98,6 +98,11 @@ or transferring them from some kind of key-value store.
machine-parseable, you may want to use --json in combination with
--batch.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
* `--json`
Enable JSON output. This is intended to be parsed by programs that use

View file

@ -40,6 +40,11 @@ for the repository as a whole.
Enable batch mode, in which a line containing an item is read from stdin,
the information about it is output to stdout, and repeat.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
* file matching options
When a directory is specified, the [[git-annex-matching-options]](1)

View file

@ -23,6 +23,11 @@ nothing is output, and it exits nonzero.
Note that if there is no key corresponding to the file, an empty line is
output to stdout instead.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
# SEE ALSO
[[git-annex]](1)

View file

@ -88,6 +88,11 @@ Moves the content of files from or to another remote.
machine-parseable, you may want to use --json in combination with
--batch.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
* `--json`
Enable JSON output. This is intended to be parsed by programs that use

View file

@ -13,14 +13,29 @@ key can be downloaded from.
No verification is performed of the url's contents.
If the key and url are not specified on the command line, they are
instead read from stdin. Any number of lines can be provided in this
mode, each containing a key and url, separated by a single space.
If no key and url pair are specified on the command line,
batch input is used, the same as if the --batch option were
specified.
Normally the key is a git-annex formatted key. However, to make it easier
to use this to add urls, if the key cannot be parsed as a key, and is a
valid url, an URL key is constructed from the url.
# OPTIONS
* `--batch`
In batch input mode, lines are read from stdin, and each line
should contain a key and url, separated by a single space.
* `-z`
When in batch mode, the input is delimited by nulls instead of the usual
newlines.
(Note that for this to be used, you have to explicitly enable batch mode
with `--batch`)
# SEE ALSO
[[git-annex]](1)

View file

@ -26,6 +26,11 @@ Multiple pairs of file and key can be given in a single command line.
Each line should contain the file, and the new key to use for that file,
separated by a single space.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
# SEE ALSO
[[git-annex]](1)

View file

@ -18,6 +18,11 @@ Record that the file is no longer available at the url.
Each line should contain the file, and the url to remove from that file,
separated by a single space.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
# SEE ALSO
[[git-annex]](1)

View file

@ -52,6 +52,11 @@ For example:
specified file matching options, an empty line will be
output instead.
* `-z`
Makes the `--batch` input be delimited by nulls instead of the usual
newlines.
* `--json`
Enable JSON output. This is intended to be parsed by programs that use