smudge: Warn when encountering a pointer file that has other content appended to it
It will then proceed to add the file the same as if it were any other file containing possibly annexable content. Usually the file is one that was annexed before, so the new, probably corrupt content will also be added to the annex. If the file was not annexed before, the content will be added to git. It's not possible for the smudge filter to throw an error here, because git then just adds the file to git anyway. Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
67245ae00f
commit
64ccb4734e
6 changed files with 55 additions and 23 deletions
|
@ -305,13 +305,20 @@ unableToRestage mf = unwords
|
|||
- valid pointer file.
|
||||
-}
|
||||
parseLinkTargetOrPointer :: S.ByteString -> Maybe Key
|
||||
parseLinkTargetOrPointer b
|
||||
| S.length b <= maxValidPointerSz =
|
||||
let (firstline, rest) = S8.span (/= '\n') b
|
||||
in case parsekey $ droptrailing '\r' firstline of
|
||||
Just k | restvalid (dropleading '\n' rest) -> Just k
|
||||
_ -> Nothing
|
||||
| otherwise = Nothing
|
||||
parseLinkTargetOrPointer = either (const Nothing) id
|
||||
. parseLinkTargetOrPointer'
|
||||
|
||||
data InvalidAppendedPointerFile = InvalidAppendedPointerFile
|
||||
|
||||
parseLinkTargetOrPointer' :: S.ByteString -> Either InvalidAppendedPointerFile (Maybe Key)
|
||||
parseLinkTargetOrPointer' b =
|
||||
let (firstline, rest) = S8.span (/= '\n') b
|
||||
in case parsekey $ droptrailing '\r' firstline of
|
||||
Just k
|
||||
| S.length b > maxValidPointerSz -> Left InvalidAppendedPointerFile
|
||||
| restvalid (dropleading '\n' rest) -> Right (Just k)
|
||||
| otherwise -> Left InvalidAppendedPointerFile
|
||||
Nothing -> Right Nothing
|
||||
where
|
||||
parsekey l
|
||||
| isLinkToAnnex l = fileKey $ snd $ S8.breakEnd pathsep l
|
||||
|
@ -344,9 +351,13 @@ parseLinkTargetOrPointer b
|
|||
{- Avoid looking at more of the lazy ByteString than necessary since it
|
||||
- could be reading from a large file that is not a pointer file. -}
|
||||
parseLinkTargetOrPointerLazy :: L.ByteString -> Maybe Key
|
||||
parseLinkTargetOrPointerLazy b =
|
||||
parseLinkTargetOrPointerLazy = either (const Nothing) id
|
||||
. parseLinkTargetOrPointerLazy'
|
||||
|
||||
parseLinkTargetOrPointerLazy' :: L.ByteString -> Either InvalidAppendedPointerFile (Maybe Key)
|
||||
parseLinkTargetOrPointerLazy' b =
|
||||
let b' = L.take (fromIntegral maxPointerSz) b
|
||||
in parseLinkTargetOrPointer (L.toStrict b')
|
||||
in parseLinkTargetOrPointer' (L.toStrict b')
|
||||
|
||||
formatPointer :: Key -> S.ByteString
|
||||
formatPointer k = prefix <> keyFile k <> nl
|
||||
|
|
|
@ -4,6 +4,8 @@ git-annex (10.20220223) UNRELEASED; urgency=medium
|
|||
some other content appended to it, and avoid treating it as a pointer
|
||||
file, so that appended content will not be checked into git, but will
|
||||
be annexed like any other file.
|
||||
* smudge: Warn when encountering a pointer file that has other content
|
||||
appended to it.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Wed, 23 Feb 2022 14:14:09 -0400
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ clean file = do
|
|||
-- hash the content provided by git, but Backend does not currently
|
||||
-- have an interface to do so.
|
||||
Command.Smudge.clean' (toRawFilePath file)
|
||||
(parseLinkTargetOrPointer b)
|
||||
(parseLinkTargetOrPointer' b)
|
||||
passthrough
|
||||
discardreststdin
|
||||
emitpointer
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2015-2022 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -105,7 +105,7 @@ clean file = do
|
|||
then L.length b `seq` return ()
|
||||
else liftIO $ hClose stdin
|
||||
let emitpointer = liftIO . S.hPut stdout . formatPointer
|
||||
clean' file (parseLinkTargetOrPointerLazy b)
|
||||
clean' file (parseLinkTargetOrPointerLazy' b)
|
||||
passthrough
|
||||
discardreststdin
|
||||
emitpointer
|
||||
|
@ -115,7 +115,7 @@ clean file = do
|
|||
-- Handles everything except the IO of the file content.
|
||||
clean'
|
||||
:: RawFilePath
|
||||
-> Maybe Key
|
||||
-> Either InvalidAppendedPointerFile (Maybe Key)
|
||||
-- ^ If the content provided by git is an annex pointer,
|
||||
-- this is the key it points to.
|
||||
-> Annex ()
|
||||
|
@ -135,19 +135,26 @@ clean' file mk passthrough discardreststdin emitpointer =
|
|||
where
|
||||
|
||||
go = case mk of
|
||||
Just k -> do
|
||||
Right (Just k) -> do
|
||||
addingExistingLink file k $ do
|
||||
getMoveRaceRecovery k file
|
||||
passthrough
|
||||
Nothing -> inRepo (Git.Ref.fileRef file) >>= \case
|
||||
Just fileref -> do
|
||||
indexmeta <- catObjectMetaData fileref
|
||||
oldkey <- case indexmeta of
|
||||
Just (_, sz, _) -> catKey' fileref sz
|
||||
Nothing -> return Nothing
|
||||
go' indexmeta oldkey
|
||||
Nothing -> passthrough
|
||||
go' indexmeta oldkey = ifM (shouldAnnex file indexmeta oldkey)
|
||||
Right Nothing -> notpointer
|
||||
Left InvalidAppendedPointerFile -> do
|
||||
toplevelWarning False $
|
||||
"The file \"" ++ fromRawFilePath file ++ "\" looks like git-annex pointer file that has had other content appended to it"
|
||||
notpointer
|
||||
|
||||
notpointer = inRepo (Git.Ref.fileRef file) >>= \case
|
||||
Just fileref -> do
|
||||
indexmeta <- catObjectMetaData fileref
|
||||
oldkey <- case indexmeta of
|
||||
Just (_, sz, _) -> catKey' fileref sz
|
||||
Nothing -> return Nothing
|
||||
notpointer' indexmeta oldkey
|
||||
Nothing -> passthrough
|
||||
|
||||
notpointer' indexmeta oldkey = ifM (shouldAnnex file indexmeta oldkey)
|
||||
( do
|
||||
discardreststdin
|
||||
|
||||
|
|
|
@ -72,3 +72,5 @@ may be situation is even more "dire" because git-annex still considers this file
|
|||
|
||||
[[!meta author=yoh]]
|
||||
[[!tag projects/datalad]]
|
||||
|
||||
> [[fixed|done]] (at least as far as it can be fixed) --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 5"""
|
||||
date="2022-02-23T19:13:25Z"
|
||||
content="""
|
||||
joey@darkstar:/tmp/r>git add xx
|
||||
git-annex: The file "xx" looks like git-annex pointer file that has had other content appended to it
|
||||
|
||||
I think this is as far as git-annex can do toward preventing foot shooting here.
|
||||
"""]]
|
Loading…
Add table
Reference in a new issue