2015-12-04 17:02:56 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
2019-10-23 18:37:51 +00:00
|
|
|
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
|
2015-12-04 17:02:56 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2015-12-04 17:02:56 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Command.Smudge where
|
|
|
|
|
|
|
|
import Command
|
2015-12-04 19:30:06 +00:00
|
|
|
import Annex.Content
|
2015-12-09 18:25:33 +00:00
|
|
|
import Annex.Link
|
2015-12-04 19:30:06 +00:00
|
|
|
import Annex.FileMatcher
|
2015-12-24 17:15:26 +00:00
|
|
|
import Annex.Ingest
|
2016-06-09 19:17:08 +00:00
|
|
|
import Annex.CatFile
|
2018-10-25 18:43:13 +00:00
|
|
|
import Logs.Smudge
|
2015-12-04 19:30:06 +00:00
|
|
|
import Logs.Location
|
2015-12-09 21:00:37 +00:00
|
|
|
import qualified Database.Keys
|
2018-08-09 22:17:46 +00:00
|
|
|
import qualified Git.BuildVersion
|
2016-01-05 21:22:19 +00:00
|
|
|
import Git.FilePath
|
2018-11-15 17:04:40 +00:00
|
|
|
import qualified Git
|
2019-10-23 19:20:00 +00:00
|
|
|
import qualified Annex
|
2016-06-09 19:17:08 +00:00
|
|
|
import Backend
|
2019-06-25 15:37:52 +00:00
|
|
|
import Utility.Metered
|
2019-10-23 18:37:51 +00:00
|
|
|
import Annex.InodeSentinal
|
|
|
|
import Utility.InodeCache
|
2015-12-04 18:03:10 +00:00
|
|
|
|
2019-01-14 19:19:20 +00:00
|
|
|
import qualified Data.ByteString as S
|
|
|
|
import qualified Data.ByteString.Lazy as L
|
2015-12-04 17:02:56 +00:00
|
|
|
|
|
|
|
cmd :: Command
|
2015-12-04 19:30:06 +00:00
|
|
|
cmd = noCommit $ noMessages $
|
2015-12-04 17:02:56 +00:00
|
|
|
command "smudge" SectionPlumbing
|
|
|
|
"git smudge filter"
|
2015-12-04 19:30:06 +00:00
|
|
|
paramFile (seek <$$> optParser)
|
2015-12-04 17:02:56 +00:00
|
|
|
|
2018-10-25 18:43:13 +00:00
|
|
|
data SmudgeOptions = UpdateOption | SmudgeOptions
|
2015-12-04 19:30:06 +00:00
|
|
|
{ smudgeFile :: FilePath
|
|
|
|
, cleanOption :: Bool
|
|
|
|
}
|
2015-12-04 17:02:56 +00:00
|
|
|
|
2015-12-04 19:30:06 +00:00
|
|
|
optParser :: CmdParamsDesc -> Parser SmudgeOptions
|
2018-10-25 18:43:13 +00:00
|
|
|
optParser desc = smudgeoptions <|> updateoption
|
|
|
|
where
|
|
|
|
smudgeoptions = SmudgeOptions
|
|
|
|
<$> argument str ( metavar desc )
|
|
|
|
<*> switch ( long "clean" <> help "clean filter" )
|
|
|
|
updateoption = flag' UpdateOption
|
|
|
|
( long "update" <> help "populate annexed worktree files" )
|
2015-12-04 19:30:06 +00:00
|
|
|
|
|
|
|
seek :: SmudgeOptions -> CommandSeek
|
2018-10-25 18:43:13 +00:00
|
|
|
seek (SmudgeOptions f False) = commandAction (smudge f)
|
|
|
|
seek (SmudgeOptions f True) = commandAction (clean f)
|
|
|
|
seek UpdateOption = commandAction update
|
2015-12-04 19:30:06 +00:00
|
|
|
|
2015-12-04 21:18:26 +00:00
|
|
|
-- Smudge filter is fed git file content, and if it's a pointer to an
|
2018-10-25 18:43:13 +00:00
|
|
|
-- available annex object, git expects it to output its content.
|
|
|
|
--
|
|
|
|
-- However, this does not do that. It outputs the pointer, and records
|
|
|
|
-- the filename in the smudge log. Git hooks run after commands like checkout
|
|
|
|
-- then run git annex smudge --update which populates the work tree files
|
|
|
|
-- with annex content. This is done for several reasons:
|
|
|
|
--
|
|
|
|
-- * To support annex.thin
|
|
|
|
-- * Because git currently buffers the whole object received from the
|
|
|
|
-- smudge filter in memory, which is a problem with large files.
|
2015-12-04 19:30:06 +00:00
|
|
|
smudge :: FilePath -> CommandStart
|
2015-12-07 18:35:46 +00:00
|
|
|
smudge file = do
|
2019-01-14 19:19:20 +00:00
|
|
|
b <- liftIO $ L.hGetContents stdin
|
|
|
|
case parseLinkTargetOrPointerLazy b of
|
2018-10-25 18:43:13 +00:00
|
|
|
Nothing -> noop
|
2015-12-04 18:03:10 +00:00
|
|
|
Just k -> do
|
2018-10-25 18:43:13 +00:00
|
|
|
topfile <- inRepo (toTopFilePath file)
|
|
|
|
Database.Keys.addAssociatedFile k topfile
|
2018-10-25 19:40:12 +00:00
|
|
|
void $ smudgeLog k topfile
|
2019-01-14 19:19:20 +00:00
|
|
|
liftIO $ L.putStr b
|
2015-12-04 18:03:10 +00:00
|
|
|
stop
|
2015-12-04 19:30:06 +00:00
|
|
|
|
2015-12-09 19:24:32 +00:00
|
|
|
-- Clean filter is fed file content on stdin, decides if a file
|
|
|
|
-- should be stored in the annex, and outputs a pointer to its
|
2018-08-09 22:17:46 +00:00
|
|
|
-- injested content if so. Otherwise, the original content.
|
2015-12-04 19:30:06 +00:00
|
|
|
clean :: FilePath -> CommandStart
|
|
|
|
clean file = do
|
2019-01-14 19:19:20 +00:00
|
|
|
b <- liftIO $ L.hGetContents stdin
|
2018-11-15 17:04:40 +00:00
|
|
|
ifM fileoutsiderepo
|
2019-01-14 19:19:20 +00:00
|
|
|
( liftIO $ L.hPut stdout b
|
|
|
|
, case parseLinkTargetOrPointerLazy b of
|
2018-11-15 17:04:40 +00:00
|
|
|
Just k -> do
|
|
|
|
getMoveRaceRecovery k file
|
2019-01-14 19:19:20 +00:00
|
|
|
liftIO $ L.hPut stdout b
|
2018-11-15 17:04:40 +00:00
|
|
|
Nothing -> go b =<< catKeyFile file
|
|
|
|
)
|
2015-12-04 19:30:06 +00:00
|
|
|
stop
|
2016-01-01 18:16:40 +00:00
|
|
|
where
|
2018-08-27 18:47:17 +00:00
|
|
|
go b oldkey = ifM (shouldAnnex file oldkey)
|
|
|
|
( do
|
|
|
|
-- Before git 2.5, failing to consume all stdin here
|
|
|
|
-- would cause a SIGPIPE and crash it.
|
|
|
|
-- Newer git catches the signal and stops sending,
|
|
|
|
-- which is much faster. (Also, git seems to forget
|
|
|
|
-- to free memory when sending the file, so the
|
|
|
|
-- less we let it send, the less memory it will waste.)
|
|
|
|
if Git.BuildVersion.older "2.5"
|
2019-01-14 19:19:20 +00:00
|
|
|
then L.length b `seq` return ()
|
2018-08-27 18:47:17 +00:00
|
|
|
else liftIO $ hClose stdin
|
2018-10-25 20:38:04 +00:00
|
|
|
|
2018-10-30 04:40:17 +00:00
|
|
|
-- Optimization for the case when the file is already
|
|
|
|
-- annexed and is unmodified.
|
2018-10-25 20:38:04 +00:00
|
|
|
case oldkey of
|
2018-10-25 21:23:53 +00:00
|
|
|
Nothing -> doingest oldkey
|
2018-10-25 20:38:04 +00:00
|
|
|
Just ko -> ifM (isUnmodifiedCheap ko file)
|
|
|
|
( liftIO $ emitPointer ko
|
2018-10-25 21:23:53 +00:00
|
|
|
, doingest oldkey
|
2018-10-25 20:38:04 +00:00
|
|
|
)
|
2019-01-14 19:19:20 +00:00
|
|
|
, liftIO $ L.hPut stdout b
|
2018-08-27 18:47:17 +00:00
|
|
|
)
|
2018-10-25 20:38:04 +00:00
|
|
|
|
2018-10-25 21:23:53 +00:00
|
|
|
doingest oldkey = do
|
2018-10-25 20:38:04 +00:00
|
|
|
-- Look up the backend that was used for this file
|
|
|
|
-- before, so that when git re-cleans a file its
|
|
|
|
-- backend does not change.
|
2019-11-22 20:24:04 +00:00
|
|
|
let oldbackend = maybe Nothing (maybeLookupBackendVariety . fromKey keyVariety) oldkey
|
2018-10-25 20:38:04 +00:00
|
|
|
-- Can't restage associated files because git add
|
|
|
|
-- runs this and has the index locked.
|
|
|
|
let norestage = Restage False
|
|
|
|
liftIO . emitPointer
|
|
|
|
=<< postingest
|
2019-06-25 15:37:52 +00:00
|
|
|
=<< (\ld -> ingest' oldbackend nullMeterUpdate ld Nothing norestage)
|
2018-10-25 20:38:04 +00:00
|
|
|
=<< lockDown cfg file
|
2018-08-27 18:47:17 +00:00
|
|
|
|
|
|
|
postingest (Just k, _) = do
|
2016-01-01 18:16:40 +00:00
|
|
|
logStatus k InfoPresent
|
|
|
|
return k
|
2018-08-27 18:47:17 +00:00
|
|
|
postingest _ = error "could not add file to the annex"
|
|
|
|
|
2016-01-07 21:39:59 +00:00
|
|
|
cfg = LockDownConfig
|
|
|
|
{ lockingFile = False
|
2019-05-07 17:04:39 +00:00
|
|
|
, hardlinkFileTmpDir = Nothing
|
2016-01-07 21:39:59 +00:00
|
|
|
}
|
2015-12-04 19:30:06 +00:00
|
|
|
|
2018-11-15 17:04:40 +00:00
|
|
|
-- git diff can run the clean filter on files outside the
|
|
|
|
-- repository; can't annex those
|
|
|
|
fileoutsiderepo = do
|
|
|
|
repopath <- liftIO . absPath =<< fromRepo Git.repoPath
|
|
|
|
filepath <- liftIO $ absPath file
|
|
|
|
return $ not $ dirContains repopath filepath
|
|
|
|
|
2019-10-24 18:10:48 +00:00
|
|
|
-- If annex.largefiles is configured, matching files are added to the
|
|
|
|
-- annex. But annex.gitaddtoannex can be set to false to disable that.
|
2019-10-23 18:37:51 +00:00
|
|
|
--
|
2019-10-24 18:10:48 +00:00
|
|
|
-- When annex.largefiles is not configured, files are normally not
|
|
|
|
-- added to the annex, so will be added to git. But some heuristics
|
|
|
|
-- are used to avoid bad behavior:
|
2019-10-23 18:37:51 +00:00
|
|
|
--
|
2019-10-24 18:10:48 +00:00
|
|
|
-- If the index already contains the file, preserve its annexed/not annexed
|
|
|
|
-- state. This prevents accidental conversions.
|
|
|
|
--
|
|
|
|
-- Otherwise, when the file's inode is the same as one that was used for
|
|
|
|
-- annexed content before, annex it. This handles cases such as renaming an
|
2019-10-23 18:37:51 +00:00
|
|
|
-- unlocked annexed file followed by git add, which the user naturally
|
|
|
|
-- expects to behave the same as git mv.
|
2018-08-27 18:47:17 +00:00
|
|
|
shouldAnnex :: FilePath -> Maybe Key -> Annex Bool
|
2019-10-23 19:20:00 +00:00
|
|
|
shouldAnnex file moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitConfig)
|
2019-10-24 18:10:48 +00:00
|
|
|
( checkmatcher checkheuristics
|
|
|
|
, checkheuristics
|
2019-10-23 19:20:00 +00:00
|
|
|
)
|
2018-08-27 18:47:17 +00:00
|
|
|
where
|
2019-10-24 15:46:40 +00:00
|
|
|
checkmatcher d = do
|
2019-10-23 19:20:00 +00:00
|
|
|
matcher <- largeFilesMatcher
|
2019-10-24 15:46:40 +00:00
|
|
|
checkFileMatcher' matcher file d
|
2019-10-23 19:20:00 +00:00
|
|
|
|
2019-10-24 18:10:48 +00:00
|
|
|
checkheuristics = case moldkey of
|
2018-08-27 18:47:17 +00:00
|
|
|
Just _ -> return True
|
2019-10-24 18:10:48 +00:00
|
|
|
Nothing -> checkknowninode
|
|
|
|
|
|
|
|
checkknowninode = withTSDelta (liftIO . genInodeCache file) >>= \case
|
|
|
|
Nothing -> pure False
|
|
|
|
Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus
|
2015-12-04 19:30:06 +00:00
|
|
|
|
|
|
|
emitPointer :: Key -> IO ()
|
2019-01-14 19:19:20 +00:00
|
|
|
emitPointer = S.putStr . formatPointer
|
2018-08-22 20:01:50 +00:00
|
|
|
|
|
|
|
-- Recover from a previous race between eg git mv and git-annex get.
|
|
|
|
-- That could result in the file remaining a pointer file, while
|
|
|
|
-- its content is present in the annex. Populate the pointer file.
|
|
|
|
--
|
|
|
|
-- This also handles the case where a copy of a pointer file is made,
|
|
|
|
-- then git-annex gets the content, and later git add is run on
|
|
|
|
-- the pointer copy. It will then be populated with the content.
|
|
|
|
getMoveRaceRecovery :: Key -> FilePath -> Annex ()
|
|
|
|
getMoveRaceRecovery k file = void $ tryNonAsync $
|
2018-10-25 18:31:45 +00:00
|
|
|
whenM (inAnnex k) $ do
|
|
|
|
obj <- calcRepo (gitAnnexLocation k)
|
|
|
|
-- Cannot restage because git add is running and has
|
|
|
|
-- the index locked.
|
|
|
|
populatePointerFile (Restage False) k obj file >>= \case
|
|
|
|
Nothing -> return ()
|
|
|
|
Just ic -> Database.Keys.addInodeCaches k [ic]
|
2018-10-25 18:43:13 +00:00
|
|
|
|
|
|
|
update :: CommandStart
|
|
|
|
update = do
|
|
|
|
updateSmudged (Restage True)
|
|
|
|
stop
|
|
|
|
|
|
|
|
updateSmudged :: Restage -> Annex ()
|
|
|
|
updateSmudged restage = streamSmudged $ \k topf -> do
|
|
|
|
f <- fromRepo $ fromTopFilePath topf
|
|
|
|
whenM (inAnnex k) $ do
|
|
|
|
obj <- calcRepo (gitAnnexLocation k)
|
|
|
|
unlessM (isJust <$> populatePointerFile restage k obj f) $
|
|
|
|
liftIO (isPointerFile f) >>= \case
|
|
|
|
Just k' | k' == k -> toplevelWarning False $
|
|
|
|
"unable to populate worktree file " ++ f
|
|
|
|
_ -> noop
|