154 lines
5 KiB
Haskell
154 lines
5 KiB
Haskell
{- git-annex command
|
|
-
|
|
- Copyright 2015-2018 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
-}
|
|
|
|
module Command.Smudge where
|
|
|
|
import Command
|
|
import qualified Annex
|
|
import Annex.Content
|
|
import Annex.Link
|
|
import Annex.FileMatcher
|
|
import Annex.Ingest
|
|
import Annex.CatFile
|
|
import Logs.Location
|
|
import qualified Database.Keys
|
|
import qualified Git.BuildVersion
|
|
import Git.FilePath
|
|
import qualified Git.Ref
|
|
import Backend
|
|
|
|
import qualified Data.ByteString.Lazy as B
|
|
|
|
cmd :: Command
|
|
cmd = noCommit $ noMessages $
|
|
command "smudge" SectionPlumbing
|
|
"git smudge filter"
|
|
paramFile (seek <$$> optParser)
|
|
|
|
data SmudgeOptions = SmudgeOptions
|
|
{ smudgeFile :: FilePath
|
|
, cleanOption :: Bool
|
|
}
|
|
|
|
optParser :: CmdParamsDesc -> Parser SmudgeOptions
|
|
optParser desc = SmudgeOptions
|
|
<$> argument str ( metavar desc )
|
|
<*> switch ( long "clean" <> help "clean filter" )
|
|
|
|
seek :: SmudgeOptions -> CommandSeek
|
|
seek o = commandAction $
|
|
(if cleanOption o then clean else smudge) (smudgeFile o)
|
|
|
|
-- Smudge filter is fed git file content, and if it's a pointer to an
|
|
-- available annex object, should output its content.
|
|
smudge :: FilePath -> CommandStart
|
|
smudge file = do
|
|
b <- liftIO $ B.hGetContents stdin
|
|
case parseLinkOrPointer b of
|
|
Nothing -> liftIO $ B.putStr b
|
|
Just k -> do
|
|
Database.Keys.addAssociatedFile k =<< inRepo (toTopFilePath file)
|
|
-- A previous unlocked checkout of the file may have
|
|
-- led to the annex object getting modified;
|
|
-- don't provide such modified content as it
|
|
-- will be confusing. inAnnex will detect such
|
|
-- modifications.
|
|
ifM (inAnnex k)
|
|
( do
|
|
content <- calcRepo (gitAnnexLocation k)
|
|
whenM (annexThin <$> Annex.getGitConfig) $
|
|
warning $ "Not able to honor annex.thin when git is checking out " ++ file ++ " (run git annex fix to re-thin files)"
|
|
liftIO $ B.putStr . fromMaybe b
|
|
=<< catchMaybeIO (B.readFile content)
|
|
, liftIO $ B.putStr b
|
|
)
|
|
stop
|
|
|
|
-- Clean filter is fed file content on stdin, decides if a file
|
|
-- should be stored in the annex, and outputs a pointer to its
|
|
-- injested content if so. Otherwise, the original content.
|
|
clean :: FilePath -> CommandStart
|
|
clean file = do
|
|
b <- liftIO $ B.hGetContents stdin
|
|
case parseLinkOrPointer b of
|
|
Just k -> do
|
|
getMoveRaceRecovery k file
|
|
liftIO $ B.hPut stdout b
|
|
Nothing -> go b =<< catKeyFile file
|
|
stop
|
|
where
|
|
go b oldkey = ifM (shouldAnnex file oldkey)
|
|
( do
|
|
-- Before git 2.5, failing to consume all stdin here
|
|
-- would cause a SIGPIPE and crash it.
|
|
-- Newer git catches the signal and stops sending,
|
|
-- which is much faster. (Also, git seems to forget
|
|
-- to free memory when sending the file, so the
|
|
-- less we let it send, the less memory it will waste.)
|
|
if Git.BuildVersion.older "2.5"
|
|
then B.length b `seq` return ()
|
|
else liftIO $ hClose stdin
|
|
-- Look up the backend that was used for this file
|
|
-- before, so that when git re-cleans a file its
|
|
-- backend does not change.
|
|
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
|
|
-- Can't restage associated files because git add
|
|
-- runs this and has the index locked.
|
|
let norestage = Restage False
|
|
liftIO . emitPointer
|
|
=<< postingest
|
|
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
|
|
=<< lockDown cfg file
|
|
, liftIO $ B.hPut stdout b
|
|
)
|
|
|
|
postingest (Just k, _) = do
|
|
logStatus k InfoPresent
|
|
return k
|
|
postingest _ = error "could not add file to the annex"
|
|
|
|
cfg = LockDownConfig
|
|
{ lockingFile = False
|
|
, hardlinkFileTmp = False
|
|
}
|
|
|
|
-- New files are annexed as configured by annex.largefiles, with a default
|
|
-- of annexing them.
|
|
--
|
|
-- If annex.largefiles is not configured for a file, and a file with its
|
|
-- name is already in the index, preserve its annexed/not annexed state.
|
|
-- This prevents accidental conversions when annex.largefiles is being
|
|
-- set/unset on the fly rather than being set in gitattributes or .git/config.
|
|
shouldAnnex :: FilePath -> Maybe Key -> Annex Bool
|
|
shouldAnnex file moldkey = do
|
|
matcher <- largeFilesMatcher
|
|
checkFileMatcher' matcher file whenempty
|
|
where
|
|
whenempty = case moldkey of
|
|
Just _ -> return True
|
|
Nothing -> isNothing <$> catObjectMetaData (Git.Ref.fileRef file)
|
|
|
|
emitPointer :: Key -> IO ()
|
|
emitPointer = putStr . formatPointer
|
|
|
|
-- Recover from a previous race between eg git mv and git-annex get.
|
|
-- That could result in the file remaining a pointer file, while
|
|
-- its content is present in the annex. Populate the pointer file.
|
|
--
|
|
-- This also handles the case where a copy of a pointer file is made,
|
|
-- then git-annex gets the content, and later git add is run on
|
|
-- the pointer copy. It will then be populated with the content.
|
|
getMoveRaceRecovery :: Key -> FilePath -> Annex ()
|
|
getMoveRaceRecovery k file = void $ tryNonAsync $
|
|
liftIO (isPointerFile file) >>= \k' -> when (Just k == k') $
|
|
whenM (inAnnex k) $ do
|
|
obj <- calcRepo (gitAnnexLocation k)
|
|
-- Cannot restage because git add is running and has
|
|
-- the index locked.
|
|
populatePointerFile (Restage False) k obj file >>= \case
|
|
Nothing -> return ()
|
|
Just ic -> Database.Keys.addInodeCaches k [ic]
|