diff --git a/Annex/Link.hs b/Annex/Link.hs index bac3531b41..a9b4108ebe 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -305,13 +305,20 @@ unableToRestage mf = unwords - valid pointer file. -} parseLinkTargetOrPointer :: S.ByteString -> Maybe Key -parseLinkTargetOrPointer b - | S.length b <= maxValidPointerSz = - let (firstline, rest) = S8.span (/= '\n') b - in case parsekey $ droptrailing '\r' firstline of - Just k | restvalid (dropleading '\n' rest) -> Just k - _ -> Nothing - | otherwise = Nothing +parseLinkTargetOrPointer = either (const Nothing) id + . parseLinkTargetOrPointer' + +data InvalidAppendedPointerFile = InvalidAppendedPointerFile + +parseLinkTargetOrPointer' :: S.ByteString -> Either InvalidAppendedPointerFile (Maybe Key) +parseLinkTargetOrPointer' b = + let (firstline, rest) = S8.span (/= '\n') b + in case parsekey $ droptrailing '\r' firstline of + Just k + | S.length b > maxValidPointerSz -> Left InvalidAppendedPointerFile + | restvalid (dropleading '\n' rest) -> Right (Just k) + | otherwise -> Left InvalidAppendedPointerFile + Nothing -> Right Nothing where parsekey l | isLinkToAnnex l = fileKey $ snd $ S8.breakEnd pathsep l @@ -344,9 +351,13 @@ parseLinkTargetOrPointer b {- Avoid looking at more of the lazy ByteString than necessary since it - could be reading from a large file that is not a pointer file. -} parseLinkTargetOrPointerLazy :: L.ByteString -> Maybe Key -parseLinkTargetOrPointerLazy b = +parseLinkTargetOrPointerLazy = either (const Nothing) id + . parseLinkTargetOrPointerLazy' + +parseLinkTargetOrPointerLazy' :: L.ByteString -> Either InvalidAppendedPointerFile (Maybe Key) +parseLinkTargetOrPointerLazy' b = let b' = L.take (fromIntegral maxPointerSz) b - in parseLinkTargetOrPointer (L.toStrict b') + in parseLinkTargetOrPointer' (L.toStrict b') formatPointer :: Key -> S.ByteString formatPointer k = prefix <> keyFile k <> nl diff --git a/CHANGELOG b/CHANGELOG index 76ca493c55..13306b9802 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,8 @@ git-annex (10.20220223) UNRELEASED; urgency=medium some other content appended to it, and avoid treating it as a pointer file, so that appended content will not be checked into git, but will be annexed like any other file. + * smudge: Warn when encountering a pointer file that has other content + appended to it. -- Joey Hess Wed, 23 Feb 2022 14:14:09 -0400 diff --git a/Command/FilterProcess.hs b/Command/FilterProcess.hs index d53303c6fd..ff20dd7268 100644 --- a/Command/FilterProcess.hs +++ b/Command/FilterProcess.hs @@ -83,7 +83,7 @@ clean file = do -- hash the content provided by git, but Backend does not currently -- have an interface to do so. Command.Smudge.clean' (toRawFilePath file) - (parseLinkTargetOrPointer b) + (parseLinkTargetOrPointer' b) passthrough discardreststdin emitpointer diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 02043341d2..a5d8871998 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2015-2021 Joey Hess + - Copyright 2015-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -105,7 +105,7 @@ clean file = do then L.length b `seq` return () else liftIO $ hClose stdin let emitpointer = liftIO . S.hPut stdout . formatPointer - clean' file (parseLinkTargetOrPointerLazy b) + clean' file (parseLinkTargetOrPointerLazy' b) passthrough discardreststdin emitpointer @@ -115,7 +115,7 @@ clean file = do -- Handles everything except the IO of the file content. clean' :: RawFilePath - -> Maybe Key + -> Either InvalidAppendedPointerFile (Maybe Key) -- ^ If the content provided by git is an annex pointer, -- this is the key it points to. -> Annex () @@ -135,19 +135,26 @@ clean' file mk passthrough discardreststdin emitpointer = where go = case mk of - Just k -> do + Right (Just k) -> do addingExistingLink file k $ do getMoveRaceRecovery k file passthrough - Nothing -> inRepo (Git.Ref.fileRef file) >>= \case - Just fileref -> do - indexmeta <- catObjectMetaData fileref - oldkey <- case indexmeta of - Just (_, sz, _) -> catKey' fileref sz - Nothing -> return Nothing - go' indexmeta oldkey - Nothing -> passthrough - go' indexmeta oldkey = ifM (shouldAnnex file indexmeta oldkey) + Right Nothing -> notpointer + Left InvalidAppendedPointerFile -> do + toplevelWarning False $ + "The file \"" ++ fromRawFilePath file ++ "\" looks like git-annex pointer file that has had other content appended to it" + notpointer + + notpointer = inRepo (Git.Ref.fileRef file) >>= \case + Just fileref -> do + indexmeta <- catObjectMetaData fileref + oldkey <- case indexmeta of + Just (_, sz, _) -> catKey' fileref sz + Nothing -> return Nothing + notpointer' indexmeta oldkey + Nothing -> passthrough + + notpointer' indexmeta oldkey = ifM (shouldAnnex file indexmeta oldkey) ( do discardreststdin diff --git a/doc/bugs/prevent_silent_data_loss_on_unlocked_files.mdwn b/doc/bugs/prevent_silent_data_loss_on_unlocked_files.mdwn index a378101743..ea3e135cef 100644 --- a/doc/bugs/prevent_silent_data_loss_on_unlocked_files.mdwn +++ b/doc/bugs/prevent_silent_data_loss_on_unlocked_files.mdwn @@ -72,3 +72,5 @@ may be situation is even more "dire" because git-annex still considers this file [[!meta author=yoh]] [[!tag projects/datalad]] + +> [[fixed|done]] (at least as far as it can be fixed) --[[Joey]] diff --git a/doc/bugs/prevent_silent_data_loss_on_unlocked_files/comment_5_adb69305a31cf55ff62497b6de8e2f6a._comment b/doc/bugs/prevent_silent_data_loss_on_unlocked_files/comment_5_adb69305a31cf55ff62497b6de8e2f6a._comment new file mode 100644 index 0000000000..362dd8a378 --- /dev/null +++ b/doc/bugs/prevent_silent_data_loss_on_unlocked_files/comment_5_adb69305a31cf55ff62497b6de8e2f6a._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 5""" + date="2022-02-23T19:13:25Z" + content=""" + joey@darkstar:/tmp/r>git add xx + git-annex: The file "xx" looks like git-annex pointer file that has had other content appended to it + +I think this is as far as git-annex can do toward preventing foot shooting here. +"""]]