git-annex/Command/Smudge.hs

155 lines
5 KiB
Haskell
Raw Normal View History

2015-12-04 17:02:56 +00:00
{- git-annex command
-
- Copyright 2015-2018 Joey Hess <id@joeyh.name>
2015-12-04 17:02:56 +00:00
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Smudge where
import Command
import qualified Annex
import Annex.Content
import Annex.Link
import Annex.FileMatcher
import Annex.Ingest
import Annex.CatFile
import Logs.Location
import qualified Database.Keys
import qualified Git.BuildVersion
import Git.FilePath
2018-08-28 17:17:06 +00:00
import qualified Git.Ref
import Backend
2015-12-04 18:03:10 +00:00
import qualified Data.ByteString.Lazy as B
2015-12-04 17:02:56 +00:00
cmd :: Command
cmd = noCommit $ noMessages $
2015-12-04 17:02:56 +00:00
command "smudge" SectionPlumbing
"git smudge filter"
paramFile (seek <$$> optParser)
2015-12-04 17:02:56 +00:00
data SmudgeOptions = SmudgeOptions
{ smudgeFile :: FilePath
, cleanOption :: Bool
}
2015-12-04 17:02:56 +00:00
optParser :: CmdParamsDesc -> Parser SmudgeOptions
optParser desc = SmudgeOptions
<$> argument str ( metavar desc )
<*> switch ( long "clean" <> help "clean filter" )
seek :: SmudgeOptions -> CommandSeek
seek o = commandAction $
(if cleanOption o then clean else smudge) (smudgeFile o)
2015-12-04 21:18:26 +00:00
-- Smudge filter is fed git file content, and if it's a pointer to an
-- available annex object, should output its content.
smudge :: FilePath -> CommandStart
smudge file = do
b <- liftIO $ B.hGetContents stdin
case parseLinkOrPointer b of
Nothing -> liftIO $ B.putStr b
2015-12-04 18:03:10 +00:00
Just k -> do
Database.Keys.addAssociatedFile k =<< inRepo (toTopFilePath file)
-- A previous unlocked checkout of the file may have
-- led to the annex object getting modified;
-- don't provide such modified content as it
-- will be confusing. inAnnex will detect such
-- modifications.
ifM (inAnnex k)
( do
content <- calcRepo (gitAnnexLocation k)
whenM (annexThin <$> Annex.getGitConfig) $
warning $ "Not able to honor annex.thin when git is checking out " ++ file ++ " (run git annex fix to re-thin files)"
liftIO $ B.putStr . fromMaybe b
=<< catchMaybeIO (B.readFile content)
, liftIO $ B.putStr b
)
2015-12-04 18:03:10 +00:00
stop
-- Clean filter is fed file content on stdin, decides if a file
-- should be stored in the annex, and outputs a pointer to its
-- injested content if so. Otherwise, the original content.
clean :: FilePath -> CommandStart
clean file = do
b <- liftIO $ B.hGetContents stdin
case parseLinkOrPointer b of
Just k -> do
getMoveRaceRecovery k file
liftIO $ B.hPut stdout b
Nothing -> go b =<< catKeyFile file
stop
where
go b oldkey = ifM (shouldAnnex file oldkey)
( do
-- Before git 2.5, failing to consume all stdin here
-- would cause a SIGPIPE and crash it.
-- Newer git catches the signal and stops sending,
-- which is much faster. (Also, git seems to forget
-- to free memory when sending the file, so the
-- less we let it send, the less memory it will waste.)
if Git.BuildVersion.older "2.5"
then B.length b `seq` return ()
else liftIO $ hClose stdin
-- Look up the backend that was used for this file
-- before, so that when git re-cleans a file its
-- backend does not change.
let oldbackend = maybe Nothing (maybeLookupBackendVariety . keyVariety) oldkey
-- Can't restage associated files because git add
-- runs this and has the index locked.
let norestage = Restage False
liftIO . emitPointer
=<< postingest
=<< (\ld -> ingest' oldbackend ld Nothing norestage)
=<< lockDown cfg file
, liftIO $ B.hPut stdout b
)
postingest (Just k, _) = do
logStatus k InfoPresent
return k
postingest _ = error "could not add file to the annex"
cfg = LockDownConfig
{ lockingFile = False
, hardlinkFileTmp = False
}
-- New files are annexed as configured by annex.largefiles, with a default
-- of annexing them.
--
-- If annex.largefiles is not configured for a file, and a file with its
-- name is already in the index, preserve its annexed/not annexed state.
-- This prevents accidental conversions when annex.largefiles is being
-- set/unset on the fly rather than being set in gitattributes or .git/config.
shouldAnnex :: FilePath -> Maybe Key -> Annex Bool
shouldAnnex file moldkey = do
matcher <- largeFilesMatcher
checkFileMatcher' matcher file whenempty
where
whenempty = case moldkey of
Just _ -> return True
2018-08-28 17:17:06 +00:00
Nothing -> isNothing <$> catObjectMetaData (Git.Ref.fileRef file)
emitPointer :: Key -> IO ()
emitPointer = putStr . formatPointer
-- Recover from a previous race between eg git mv and git-annex get.
-- That could result in the file remaining a pointer file, while
-- its content is present in the annex. Populate the pointer file.
--
-- This also handles the case where a copy of a pointer file is made,
-- then git-annex gets the content, and later git add is run on
-- the pointer copy. It will then be populated with the content.
getMoveRaceRecovery :: Key -> FilePath -> Annex ()
getMoveRaceRecovery k file = void $ tryNonAsync $
liftIO (isPointerFile file) >>= \k' -> when (Just k == k') $
whenM (inAnnex k) $ do
obj <- calcRepo (gitAnnexLocation k)
-- Cannot restage because git add is running and has
-- the index locked.
populatePointerFile (Restage False) k obj file >>= \case
Nothing -> return ()
Just ic -> Database.Keys.addInodeCaches k [ic]