From 38e9ea8497bb2ab058e5bd46a666857789c0a84d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 13 Mar 2023 13:56:06 -0400 Subject: [PATCH] one-way escaping of newlines in uuid.log A repository can have a newline in its description due to being in a directory containing a newline, or due to git-annex describe being passed a string with a newline in it for some reason. Putting that newline in uuid.log breaks its format. So, escape the newline when it enters uuid.log, to \n This is a one-way escaping, it is not converted back to a newline when reading the log. If it were, commands like git-annex info and whereis would display a multi-line description, which could be confusing to read. And, implementing roundtripping would necessarily cause problems if an old version of git-annex were used to set a description that contained whatever special character is used to escape the \n. Eg, a \ or if it used the ! prefix before base64 data that is used in some other logs, the ! character. Then the description set by the old git-annex would not roundtrip. There just doesn't seem to be any benefit of roundtripping newlines through, so why bother? And, git often displays \n for newline when a filename contains a newline, so git-annex doing it in this case seems sorta ok by analogy to git. (Some other git-annex logs can also have newlines put into them if the user really wants to break git-annex. For example: git-annex config annex.largefiles "foo bar" The full list is probably config.log, remote.log, group.log, preferred-content.log, required-content.log, group-preferred-content.log, schedule.log. Probably there is no good reason to use a newline in any of these, and the breakage is probably limited to the bad data the user put in not coming back out. And users can write any garbage to log files themselves manually in any case. So, I am not going to address all of those at this time. If a problem such as this one with the newline in the repository path comes up, it can be dealt with on a case by case basis.) Sponsored-by: Dartmouth College's Datalad project --- CHANGELOG | 1 + Logs/UUID.hs | 15 +++++++++++++-- Types/UUID.hs | 3 --- ...ails_in_a_folder_with_newline_in_its_name.mdwn | 2 ++ ...nt_1_2690ed9441685068c291a182d39c2616._comment | 6 ------ 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index e5202600f8..649e54f877 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,6 +12,7 @@ git-annex (10.20230228) UNRELEASED; urgency=medium whose kernels are too old to support kernels used by the current arm64 build. * importfeed: Display feed title. + * init: Support being ran in a repository that has a newline in its path. -- Joey Hess Mon, 27 Feb 2023 12:31:14 -0400 diff --git a/Logs/UUID.hs b/Logs/UUID.hs index 6681860da7..1a3142e198 100644 --- a/Logs/UUID.hs +++ b/Logs/UUID.hs @@ -2,11 +2,13 @@ - - uuid.log stores a list of known uuids, and their descriptions. - - - Copyright 2010-2019 Joey Hess + - Copyright 2010-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE OverloadedStrings #-} + module Logs.UUID ( uuidLog, describeUUID, @@ -25,15 +27,24 @@ import Logs.UUIDBased import qualified Annex.UUID import qualified Data.Map.Strict as M +import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L import qualified Data.Attoparsec.ByteString.Lazy as A +import Data.ByteString.Builder +import Data.Char {- Records a description for a uuid in the log. -} describeUUID :: UUID -> UUIDDesc -> Annex () describeUUID uuid desc = do c <- currentVectorClock Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) uuidLog $ - buildLogOld buildUUIDDesc . changeLog c uuid desc . parseUUIDLog + buildLogOld builder . changeLog c uuid desc . parseUUIDLog + where + builder (UUIDDesc b) = byteString (escnewline b) + -- Escape any newline in the description, since newlines cannot + -- be present in the logged value. This is a one-way escaping. + escnewline = B.intercalate "\\n" . B.split newline + newline = fromIntegral (ord '\n') {- The map is cached for speed. -} uuidDescMap :: Annex UUIDDescMap diff --git a/Types/UUID.hs b/Types/UUID.hs index 3e59fc685c..08f7f8b04a 100644 --- a/Types/UUID.hs +++ b/Types/UUID.hs @@ -82,9 +82,6 @@ fromUUIDDesc (UUIDDesc d) = decodeBS d toUUIDDesc :: String -> UUIDDesc toUUIDDesc = UUIDDesc . encodeBS -buildUUIDDesc :: UUIDDesc -> Builder -buildUUIDDesc (UUIDDesc b) = byteString b - type UUIDDescMap = M.Map UUID UUIDDesc instance Proto.Serializable UUID where diff --git a/doc/bugs/init_fails_in_a_folder_with_newline_in_its_name.mdwn b/doc/bugs/init_fails_in_a_folder_with_newline_in_its_name.mdwn index cfb2c55bdc..0a071a6357 100644 --- a/doc/bugs/init_fails_in_a_folder_with_newline_in_its_name.mdwn +++ b/doc/bugs/init_fails_in_a_folder_with_newline_in_its_name.mdwn @@ -32,3 +32,5 @@ git-annex version: 10.20230214+git26-g8f2829e646-1~ndall+1 as `git` doesn't mind, and now annex batched commands support `-z` already for filenames with newlines in them, I think git-annex should tolerate repository folders with newlines in them too. [[!tag projects/datalad]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/init_fails_in_a_folder_with_newline_in_its_name/comment_1_2690ed9441685068c291a182d39c2616._comment b/doc/bugs/init_fails_in_a_folder_with_newline_in_its_name/comment_1_2690ed9441685068c291a182d39c2616._comment index 201c6f93c4..949bee65a5 100644 --- a/doc/bugs/init_fails_in_a_folder_with_newline_in_its_name/comment_1_2690ed9441685068c291a182d39c2616._comment +++ b/doc/bugs/init_fails_in_a_folder_with_newline_in_its_name/comment_1_2690ed9441685068c291a182d39c2616._comment @@ -14,10 +14,4 @@ written to uuid.log contains a newline, which prevents parsing that line of the log correctly. This can also be seen by passing a value with a newline to `git-annex describe`. It would also happen in the case with the newline directory if it didn't fail earlier. - -Another log file that has a similar problem BTW is config.log, -which can get a newline in a value with eg -`git annex config --set annex.largefiles "xxx\nyyy"` -and the result is that reading the value back out omits -the part after the newline. """]]