From d44fb89d4f4d3120a34a192025ee901c064cab00 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 2 Feb 2020 16:01:46 -0400 Subject: [PATCH] fix Arbitrary AssociatedFile to not crash when LANG=C Even letting through things that Data.Char.generalCategory said wereUppercaseLetter caused the crash. Apparently what's going on is that, in LANG=C, it does not expect to find unicode chars in a String, except presumably ones that are surrogates. But ascii is good enough to test the things we need to test about associated files. --- Key.hs | 11 +++++-- ...ead__95__write__95__transferinfo_test.mdwn | 2 ++ ..._e9db58f71eedc99ccfd7a7a446843316._comment | 31 +++++++++++++++++++ 3 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test/comment_1_e9db58f71eedc99ccfd7a7a446843316._comment diff --git a/Key.hs b/Key.hs index 8b8ca96b2a..2c2f20cd92 100644 --- a/Key.hs +++ b/Key.hs @@ -1,6 +1,6 @@ {- git-annex Keys - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -28,6 +28,7 @@ module Key ( prop_isomorphic_key_encode ) where +import Data.Char import qualified Data.Text as T import qualified Data.ByteString as S import qualified Data.Attoparsec.ByteString as A @@ -79,11 +80,15 @@ instance Arbitrary KeyData where <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative -- AssociatedFile cannot be empty, and cannot contain a NUL --- (but can be Nothing) +-- (but can be Nothing). instance Arbitrary AssociatedFile where - arbitrary = (AssociatedFile . fmap toRawFilePath <$> arbitrary) + arbitrary = (AssociatedFile . fmap conv <$> arbitrary) `suchThat` (/= AssociatedFile (Just S.empty)) `suchThat` (\(AssociatedFile f) -> maybe True (S.notElem 0) f) + where + -- Generating arbitrary unicode leads to encoding errors + -- when LANG=C, so limit to ascii. + conv = toRawFilePath . filter isAscii instance Arbitrary Key where arbitrary = mkKey . const <$> arbitrary diff --git a/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test.mdwn b/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test.mdwn index bcfea264f2..9ff3429c8f 100644 --- a/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test.mdwn +++ b/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test.mdwn @@ -18,3 +18,5 @@ Full build logs are at http://neuro.debian.net/_files/_buildlogs/git-annex/7.201 [[!meta author=yoh]] [[!tag projects/datalad]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test/comment_1_e9db58f71eedc99ccfd7a7a446843316._comment b/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test/comment_1_e9db58f71eedc99ccfd7a7a446843316._comment new file mode 100644 index 0000000000..86f92e591b --- /dev/null +++ b/doc/bugs/build_of_7.20191230+git152-gefb981388_fails_the_prop__95__read__95__write__95__transferinfo_test/comment_1_e9db58f71eedc99ccfd7a7a446843316._comment @@ -0,0 +1,31 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2020-02-02T19:41:34Z" + content=""" +Minimal reproducer: + + bash$ LANG=C ghci Utility/FileSystemEncoding.hs + ghci> useFileSystemEncoding + ghci> toRawFilePath "\611584" + "*** Exception: recoverEncode: invalid argument (invalid character) + +No such problem in a unicode locale. + +The problem does not, though, affect actually using git-annex in LANG=C +with a filename with that in its name. + +Odd because the filesystem encoding is supposed to round-tip well, +anything, but here encoding a string with it is failing internally. +Maybe the thing is, it's not really round-tripping? QuickCheck arbitrary +magics up a FilePath that contains that, so it's starting in the middle and +trying to convert it out. + +[[!commit 70395659db9f662e61009d984fc9b0b2f24fdece]] introduced this while +fixing another intermittent encoding test case failure. + + ghci> Data.Char.generalCategory '\611584' + NotAssigned + +I think it would make sense to filter out NotAssigned and PrivateUse. +"""]]