From eba68572dc70342aaf2f9e2448ae140ae643e63d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 27 May 2016 11:45:13 -0400 Subject: [PATCH] Split lines in the git-annex branch on \r as well as \n, to deal with \r\n terminated lines written by some versions of git-annex on Windows. This fixes strange displays in some cases, including whereis showing many duplicate locations, and showing more total copies than actually exist. It's unknown if that lead to data loss when eg, dropping. At the moment, it seems unlikely it could, since the UUID with \r's appended is not the same as a UUID without, and so no remote matches it. It's also unknown if \r's can leak in on windows, perhaps when merging the git-annex branch. --- CHANGELOG | 3 ++ COPYRIGHT | 32 ++++++++++++ Logs/Line.hs | 51 +++++++++++++++++++ Logs/MapLog.hs | 3 +- Logs/Presence/Pure.hs | 3 +- Logs/SingleValue.hs | 3 +- Logs/Transitions.hs | 3 +- Logs/UUIDBased.hs | 3 +- ..._2355fd90d4ff8dd440bf11348a8daa73._comment | 10 ++++ 9 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 Logs/Line.hs create mode 100644 doc/bugs/Whereis_reports_same_UUID_multiple_times/comment_5_2355fd90d4ff8dd440bf11348a8daa73._comment diff --git a/CHANGELOG b/CHANGELOG index e58c6eedaf..0c5b6db83b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ git-annex (6.20160512) UNRELEASED; urgency=medium + * Split lines in the git-annex branch on \r as well as \n, to deal + with \r\n terminated lines written by some versions of git-annex on + Windows. This fixes strange displays in some cases. * Change git annex info remote encryption description to use wording closer to what's used in initremote. * webapp: Avoid confusing display of dead remotes. diff --git a/COPYRIGHT b/COPYRIGHT index 6620962b6b..119bbf6c61 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -95,6 +95,38 @@ License: MIT-twitter OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +Files: Logs/Line.hs +Copyright: 2001, The University Court of the University of Glasgow. +License: other + All rights reserved. + . + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + . + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + . + - Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + . + - Neither name of the University nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY COURT OF THE UNIVERSITY OF + GLASGOW AND THE CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + UNIVERSITY COURT OF THE UNIVERSITY OF GLASGOW OR THE CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + DAMAGE. + License: GPL-3+ The full text of version 3 of the GPL is distributed as doc/license/GPL in this package's source, or in /usr/share/common-licenses/GPL-3 on diff --git a/Logs/Line.hs b/Logs/Line.hs new file mode 100644 index 0000000000..a7e17190e2 --- /dev/null +++ b/Logs/Line.hs @@ -0,0 +1,51 @@ +{- + +The Glasgow Haskell Compiler License + +Copyright 2001, The University Court of the University of Glasgow. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +- Neither name of the University nor the names of its contributors may be +used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY COURT OF THE UNIVERSITY OF +GLASGOW AND THE CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +UNIVERSITY COURT OF THE UNIVERSITY OF GLASGOW OR THE CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. + +-} + +module Logs.Line where + +-- This is the same as Data.List.lines, with \r added. +-- This works around some versions of git-annex which wrote \r +-- into git-annex branch files on Windows. Those \r's sometimes +-- accumulated over time, so a single line could end with multiple \r's +-- before the \n. +splitLines :: String -> [String] +splitLines "" = [] +splitLines s = cons (case break (\c -> c == '\n' || c == '\r') s of + (l, s') -> (l, case s' of + [] -> [] + _:s'' -> splitLines s'')) + where + cons ~(h, t) = h : t diff --git a/Logs/MapLog.hs b/Logs/MapLog.hs index d5bb67f686..097439ac58 100644 --- a/Logs/MapLog.hs +++ b/Logs/MapLog.hs @@ -18,6 +18,7 @@ import Data.Time.Clock.POSIX import Common import Logs.TimeStamp +import Logs.Line data TimeStamp = Unknown | Date POSIXTime deriving (Eq, Ord, Show) @@ -38,7 +39,7 @@ showMapLog fieldshower valueshower = unlines . map showpair . M.toList unwords ["0", fieldshower f, valueshower v] parseMapLog :: Ord f => (String -> Maybe f) -> (String -> Maybe v) -> String -> MapLog f v -parseMapLog fieldparser valueparser = M.fromListWith best . mapMaybe parse . lines +parseMapLog fieldparser valueparser = M.fromListWith best . mapMaybe parse . splitLines where parse line = do let (ts, rest) = splitword line diff --git a/Logs/Presence/Pure.hs b/Logs/Presence/Pure.hs index e2ec3f13d3..7955c8da31 100644 --- a/Logs/Presence/Pure.hs +++ b/Logs/Presence/Pure.hs @@ -12,6 +12,7 @@ import qualified Data.Map as M import Annex.Common import Logs.TimeStamp +import Logs.Line import Utility.QuickCheck data LogLine = LogLine { @@ -25,7 +26,7 @@ data LogStatus = InfoPresent | InfoMissing | InfoDead {- Parses a log file. Unparseable lines are ignored. -} parseLog :: String -> [LogLine] -parseLog = mapMaybe parseline . lines +parseLog = mapMaybe parseline . splitLines where parseline l = LogLine <$> parsePOSIXTime d diff --git a/Logs/SingleValue.hs b/Logs/SingleValue.hs index 9b1306c985..201e205eb2 100644 --- a/Logs/SingleValue.hs +++ b/Logs/SingleValue.hs @@ -16,6 +16,7 @@ module Logs.SingleValue where import Annex.Common import qualified Annex.Branch import Logs.TimeStamp +import Logs.Line import qualified Data.Set as S import Data.Time.Clock.POSIX @@ -37,7 +38,7 @@ showLog = unlines . map showline . S.toList showline (LogEntry t v) = unwords [show t, serialize v] parseLog :: (Ord v, SingleValueSerializable v) => String -> Log v -parseLog = S.fromList . mapMaybe parse . lines +parseLog = S.fromList . mapMaybe parse . splitLines where parse line = do let (ts, s) = splitword line diff --git a/Logs/Transitions.hs b/Logs/Transitions.hs index 5440047688..07667c4074 100644 --- a/Logs/Transitions.hs +++ b/Logs/Transitions.hs @@ -19,6 +19,7 @@ import qualified Data.Set as S import Annex.Common import Logs.TimeStamp +import Logs.Line transitionsLog :: FilePath transitionsLog = "transitions.log" @@ -50,7 +51,7 @@ showTransitions = unlines . map showTransitionLine . S.elems {- If the log contains new transitions we don't support, returns Nothing. -} parseTransitions :: String -> Maybe Transitions -parseTransitions = check . map parseTransitionLine . lines +parseTransitions = check . map parseTransitionLine . splitLines where check l | all isJust l = Just $ S.fromList $ catMaybes l diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs index 5613c6fb41..97ecd10113 100644 --- a/Logs/UUIDBased.hs +++ b/Logs/UUIDBased.hs @@ -35,6 +35,7 @@ import Common import Types.UUID import Logs.MapLog import Logs.TimeStamp +import Logs.Line type Log v = MapLog UUID v @@ -50,7 +51,7 @@ parseLog :: (String -> Maybe a) -> String -> Log a parseLog = parseLogWithUUID . const parseLogWithUUID :: (UUID -> String -> Maybe a) -> String -> Log a -parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . lines +parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . splitLines where parse line -- This is a workaround for a bug that caused diff --git a/doc/bugs/Whereis_reports_same_UUID_multiple_times/comment_5_2355fd90d4ff8dd440bf11348a8daa73._comment b/doc/bugs/Whereis_reports_same_UUID_multiple_times/comment_5_2355fd90d4ff8dd440bf11348a8daa73._comment new file mode 100644 index 0000000000..5ae6c2b1f4 --- /dev/null +++ b/doc/bugs/Whereis_reports_same_UUID_multiple_times/comment_5_2355fd90d4ff8dd440bf11348a8daa73._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2016-05-27T15:40:16Z" + content=""" +I've developed a patch which yields a good whereis display in this repo. + +Still remains to be seen if there's some code path that currently causes +'\r' to get added in the current version of git-annex on Windows. +"""]]