git-annex/Command/Smudge.hs
Joey Hess ce73a96e4e
use InodeCache when dropping a key to see if a pointer file can be safely reset
The Keys database can hold multiple inode caches for a given key. One for
the annex object, and one for each pointer file, which may not be hard
linked to it.

Inode caches for a key are recorded when its content is added to the annex,
but only if it has known pointer files. This is to avoid the overhead of
maintaining the database when not needed.

When the smudge filter outputs a file's content, the inode cache is not
updated, because git's smudge interface doesn't let us write the file. So,
dropping will fall back to doing an expensive verification then. Ideally,
git's interface would be improved, and then the inode cache could be
updated then too.
2015-12-09 17:54:54 -04:00

102 lines
2.6 KiB
Haskell

{- git-annex command
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Smudge where
import Common.Annex
import Command
import Annex.Content
import Annex.Link
import Annex.MetaData
import Annex.FileMatcher
import Types.KeySource
import Backend
import Logs.Location
import qualified Database.Keys
import qualified Data.ByteString.Lazy as B
cmd :: Command
cmd = noCommit $ noMessages $
command "smudge" SectionPlumbing
"git smudge filter"
paramFile (seek <$$> optParser)
data SmudgeOptions = SmudgeOptions
{ smudgeFile :: FilePath
, cleanOption :: Bool
}
optParser :: CmdParamsDesc -> Parser SmudgeOptions
optParser desc = SmudgeOptions
<$> argument str ( metavar desc )
<*> switch ( long "clean" <> help "clean filter" )
seek :: SmudgeOptions -> CommandSeek
seek o = commandAction $
(if cleanOption o then clean else smudge) (smudgeFile o)
-- Smudge filter is fed git file content, and if it's a pointer to an
-- available annex object, should output its content.
smudge :: FilePath -> CommandStart
smudge file = do
b <- liftIO $ B.hGetContents stdin
case parseLinkOrPointer b of
Nothing -> liftIO $ B.putStr b
Just k -> do
Database.Keys.addAssociatedFile k file
content <- calcRepo (gitAnnexLocation k)
liftIO $ B.hPut stdout . fromMaybe b
=<< catchMaybeIO (B.readFile content)
stop
-- Clean filter is fed file content on stdin, decides if a file
-- should be stored in the annex, and outputs a pointer to its
-- injested content.
clean :: FilePath -> CommandStart
clean file = do
b <- liftIO $ B.hGetContents stdin
if isJust (parseLinkOrPointer b)
then liftIO $ B.hPut stdout b
else ifM (shouldAnnex file)
( do
k <- ingest file
Database.Keys.addAssociatedFile k file
liftIO $ emitPointer k
, liftIO $ B.hPut stdout b
)
stop
shouldAnnex :: FilePath -> Annex Bool
shouldAnnex file = do
matcher <- largeFilesMatcher
checkFileMatcher matcher file
ingest :: FilePath -> Annex Key
ingest file = do
backend <- chooseBackend file
let source = KeySource
{ keyFilename = file
, contentLocation = file
, inodeCache = Nothing
}
k <- fst . fromMaybe (error "failed to generate a key")
<$> genKey source backend
-- Hard link (or copy) file content to annex
-- to prevent it from being lost when git checks out
-- a branch not containing this file.
r <- linkAnnex k file
case r of
LinkAnnexFailed -> error "Problem adding file to the annex"
LinkAnnexOk -> logStatus k InfoPresent
LinkAnnexNoop -> noop
genMetaData k file
=<< liftIO (getFileStatus file)
return k
emitPointer :: Key -> IO ()
emitPointer = putStrLn . formatPointer