smudge: Warn when encountering a pointer file that has other content appended to it

It will then proceed to add the file the same as if it were any other
file containing possibly annexable content. Usually the file is one that
was annexed before, so the new, probably corrupt content will also be added
to the annex. If the file was not annexed before, the content will be added
to git.

It's not possible for the smudge filter to throw an error here, because
git then just adds the file to git anyway.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2022-02-23 15:17:08 -04:00
parent 67245ae00f
commit 64ccb4734e
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 55 additions and 23 deletions

View file

@ -305,13 +305,20 @@ unableToRestage mf = unwords
- valid pointer file.
-}
parseLinkTargetOrPointer :: S.ByteString -> Maybe Key
parseLinkTargetOrPointer b
| S.length b <= maxValidPointerSz =
parseLinkTargetOrPointer = either (const Nothing) id
. parseLinkTargetOrPointer'
data InvalidAppendedPointerFile = InvalidAppendedPointerFile
parseLinkTargetOrPointer' :: S.ByteString -> Either InvalidAppendedPointerFile (Maybe Key)
parseLinkTargetOrPointer' b =
let (firstline, rest) = S8.span (/= '\n') b
in case parsekey $ droptrailing '\r' firstline of
Just k | restvalid (dropleading '\n' rest) -> Just k
_ -> Nothing
| otherwise = Nothing
Just k
| S.length b > maxValidPointerSz -> Left InvalidAppendedPointerFile
| restvalid (dropleading '\n' rest) -> Right (Just k)
| otherwise -> Left InvalidAppendedPointerFile
Nothing -> Right Nothing
where
parsekey l
| isLinkToAnnex l = fileKey $ snd $ S8.breakEnd pathsep l
@ -344,9 +351,13 @@ parseLinkTargetOrPointer b
{- Avoid looking at more of the lazy ByteString than necessary since it
- could be reading from a large file that is not a pointer file. -}
parseLinkTargetOrPointerLazy :: L.ByteString -> Maybe Key
parseLinkTargetOrPointerLazy b =
parseLinkTargetOrPointerLazy = either (const Nothing) id
. parseLinkTargetOrPointerLazy'
parseLinkTargetOrPointerLazy' :: L.ByteString -> Either InvalidAppendedPointerFile (Maybe Key)
parseLinkTargetOrPointerLazy' b =
let b' = L.take (fromIntegral maxPointerSz) b
in parseLinkTargetOrPointer (L.toStrict b')
in parseLinkTargetOrPointer' (L.toStrict b')
formatPointer :: Key -> S.ByteString
formatPointer k = prefix <> keyFile k <> nl

View file

@ -4,6 +4,8 @@ git-annex (10.20220223) UNRELEASED; urgency=medium
some other content appended to it, and avoid treating it as a pointer
file, so that appended content will not be checked into git, but will
be annexed like any other file.
* smudge: Warn when encountering a pointer file that has other content
appended to it.
-- Joey Hess <id@joeyh.name> Wed, 23 Feb 2022 14:14:09 -0400

View file

@ -83,7 +83,7 @@ clean file = do
-- hash the content provided by git, but Backend does not currently
-- have an interface to do so.
Command.Smudge.clean' (toRawFilePath file)
(parseLinkTargetOrPointer b)
(parseLinkTargetOrPointer' b)
passthrough
discardreststdin
emitpointer

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
- Copyright 2015-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -105,7 +105,7 @@ clean file = do
then L.length b `seq` return ()
else liftIO $ hClose stdin
let emitpointer = liftIO . S.hPut stdout . formatPointer
clean' file (parseLinkTargetOrPointerLazy b)
clean' file (parseLinkTargetOrPointerLazy' b)
passthrough
discardreststdin
emitpointer
@ -115,7 +115,7 @@ clean file = do
-- Handles everything except the IO of the file content.
clean'
:: RawFilePath
-> Maybe Key
-> Either InvalidAppendedPointerFile (Maybe Key)
-- ^ If the content provided by git is an annex pointer,
-- this is the key it points to.
-> Annex ()
@ -135,19 +135,26 @@ clean' file mk passthrough discardreststdin emitpointer =
where
go = case mk of
Just k -> do
Right (Just k) -> do
addingExistingLink file k $ do
getMoveRaceRecovery k file
passthrough
Nothing -> inRepo (Git.Ref.fileRef file) >>= \case
Right Nothing -> notpointer
Left InvalidAppendedPointerFile -> do
toplevelWarning False $
"The file \"" ++ fromRawFilePath file ++ "\" looks like git-annex pointer file that has had other content appended to it"
notpointer
notpointer = inRepo (Git.Ref.fileRef file) >>= \case
Just fileref -> do
indexmeta <- catObjectMetaData fileref
oldkey <- case indexmeta of
Just (_, sz, _) -> catKey' fileref sz
Nothing -> return Nothing
go' indexmeta oldkey
notpointer' indexmeta oldkey
Nothing -> passthrough
go' indexmeta oldkey = ifM (shouldAnnex file indexmeta oldkey)
notpointer' indexmeta oldkey = ifM (shouldAnnex file indexmeta oldkey)
( do
discardreststdin

View file

@ -72,3 +72,5 @@ may be situation is even more "dire" because git-annex still considers this file
[[!meta author=yoh]]
[[!tag projects/datalad]]
> [[fixed|done]] (at least as far as it can be fixed) --[[Joey]]

View file

@ -0,0 +1,10 @@
[[!comment format=mdwn
username="joey"
subject="""comment 5"""
date="2022-02-23T19:13:25Z"
content="""
joey@darkstar:/tmp/r>git add xx
git-annex: The file "xx" looks like git-annex pointer file that has had other content appended to it
I think this is as far as git-annex can do toward preventing foot shooting here.
"""]]