git-annex/Command/Add.hs

207 lines
6.4 KiB
Haskell
Raw Normal View History

{- git-annex command
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Command.Add where
import Command
2015-12-22 17:23:33 +00:00
import Annex.Ingest
2011-10-15 20:21:08 +00:00
import Logs.Location
2011-10-04 04:40:47 +00:00
import Annex.Content
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
import qualified Annex
import qualified Annex.Queue
import qualified Database.Keys
import Annex.FileMatcher
import Annex.Link
import Annex.Tmp
import Messages.Progress
import Git.FilePath
import Config.GitConfig
import Config.Smudge
import Utility.OptParse
import Utility.InodeCache
import Annex.InodeSentinal
import qualified Utility.RawFilePath as R
cmd :: Command
cmd = notBareRepo $
withGlobalOptions opts $
command "add" SectionCommon "add files to annex"
paramPaths (seek <$$> optParser)
where
opts =
[ jobsOption
, jsonOptions
, jsonProgressOption
, fileMatchingOptions LimitDiskFiles
]
data AddOptions = AddOptions
{ addThese :: CmdParams
2016-01-19 21:46:46 +00:00
, batchOption :: BatchMode
, updateOnly :: Bool
, largeFilesOverride :: Maybe Bool
, checkGitIgnoreOption :: CheckGitIgnore
}
optParser :: CmdParamsDesc -> Parser AddOptions
optParser desc = AddOptions
<$> cmdParams desc
2016-01-19 21:46:46 +00:00
<*> parseBatchOption
<*> switch
( long "update"
<> short 'u'
<> help "only update tracked files"
)
<*> (parseforcelarge <|> parseforcesmall)
<*> checkGitIgnoreSwitch
where
parseforcelarge = flag Nothing (Just True)
( long "force-large"
<> help "add all files to annex, ignoring other configuration"
)
parseforcesmall = flag Nothing (Just False)
( long "force-small"
<> help "add all files to git, ignoring other configuration"
)
checkGitIgnoreSwitch :: Parser CheckGitIgnore
checkGitIgnoreSwitch = CheckGitIgnore <$>
invertableSwitch "check-gitignore" True
(help "Do not check .gitignore when adding files")
seek :: AddOptions -> CommandSeek
seek o = startConcurrency commandStages $ do
largematcher <- largeFilesMatcher
addunlockedmatcher <- addUnlockedMatcher
annexdotfiles <- getGitConfigVal annexDotFiles
let gofile (si, file) = case largeFilesOverride o of
2020-01-01 18:26:43 +00:00
Nothing ->
ifM (pure (annexdotfiles || not (dotfile file))
<&&> (checkFileMatcher largematcher file
<||> Annex.getState Annex.force))
( start o si file addunlockedmatcher
2020-01-01 18:26:43 +00:00
, ifM (annexAddSmallFiles <$> Annex.getGitConfig)
( startSmall o si file
2020-01-01 18:26:43 +00:00
, stop
)
)
Just True -> start o si file addunlockedmatcher
Just False -> startSmallOverridden o si file
2016-01-19 21:46:46 +00:00
case batchOption o of
Batch fmt
| updateOnly o ->
giveup "--update --batch is not supported"
| otherwise -> batchFilesMatching fmt gofile
2016-01-19 21:46:46 +00:00
NoBatch -> do
-- Avoid git ls-files complaining about files that
-- are not known to git yet, since this will add
-- them. Instead, have workTreeItems warn about other
-- problems, like files that don't exist.
let ww = WarnUnmatchWorkTreeItems
l <- workTreeItems ww (addThese o)
let go a = a ww (commandAction . gofile) l
unless (updateOnly o) $
go (withFilesNotInGit (checkGitIgnoreOption o))
go withFilesMaybeModified
go withUnmodifiedUnlockedPointers
2010-11-11 22:54:52 +00:00
{- Pass file off to git-add. -}
startSmall :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
startSmall o si file =
starting "add" (ActionItemTreeFile file) si $
next $ addSmall (checkGitIgnoreOption o) file
addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool
addSmall ci file = do
2015-04-08 20:16:42 +00:00
showNote "non-large file; adding content to git repository"
addFile Small ci file
startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
startSmallOverridden o si file =
starting "add" (ActionItemTreeFile file) si $ next $ do
showNote "adding content to git repository"
addFile Small (checkGitIgnoreOption o) file
data SmallOrLarge = Small | Large
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool
addFile smallorlarge ci file = do
ps <- gitAddParams ci
cps <- case smallorlarge of
-- In case the file is being converted from an annexed file
-- to be stored in git, remove the cached inode, so that
-- if the smudge clean filter later runs on the file,
-- it will not remember it was annexed.
--
-- The use of bypassSmudgeConfig prevents the smudge
-- filter from being run. So the changes to the database
-- can be queued up and not flushed to disk immediately.
Small -> do
maybe noop Database.Keys.removeInodeCache
=<< withTSDelta (liftIO . genInodeCache file)
return bypassSmudgeConfig
Large -> return []
Annex.Queue.addCommand cps "add" (ps++[Param "--"])
[fromRawFilePath file]
return True
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
start o si file addunlockedmatcher = do
mk <- liftIO $ isPointerFile file
maybe go fixuppointer mk
2012-11-12 05:05:04 +00:00
where
go = ifAnnexed file addpresent add
add = liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
2017-12-05 19:00:50 +00:00
Nothing -> stop
Just s
| not (isRegularFile s) && not (isSymbolicLink s) -> stop
make CommandStart return a StartMessage The goal is to be able to run CommandStart in the main thread when -J is used, rather than unncessarily passing it off to a worker thread, which incurs overhead that is signficant when the CommandStart is going to quickly decide to stop. To do that, the message it displays needs to be displayed in the worker thread, after the CommandStart has run. Also, the change will mean that CommandStart will no longer necessarily run with the same Annex state as CommandPerform. While its docs already said it should avoid modifying Annex state, I audited all the CommandStart code as part of the conversion. (Note that CommandSeek already sometimes runs with a different Annex state, and that has not been a source of any problems, so I am not too worried that this change will lead to breakage going forward.) The only modification of Annex state I found was it calling allowMessages in some Commands that default to noMessages. Dealt with that by adding a startCustomOutput and a startingUsualMessages. This lets a command start with noMessages and then select the output it wants for each CommandStart. One bit of breakage: onlyActionOn has been removed from commands that used it. The plan is that, since a StartMessage contains an ActionItem, when a Key can be extracted from that, the parallel job runner can run onlyActionOn' automatically. Then commands won't need to worry about this detail. Future work. Otherwise, this was a fairly straightforward process of making each CommandStart compile again. Hopefully other behavior changes were mostly avoided. In a few cases, a command had a CommandStart that called a CommandPerform that then called showStart multiple times. I have collapsed those down to a single start action. The main command to perhaps suffer from it is Command.Direct, which used to show a start for each file, and no longer does. Another minor behavior change is that some commands used showStart before, but had an associated file and a Key available, so were changed to ShowStart with an ActionItemAssociatedFile. That will not change the normal output or behavior, but --json output will now include the key. This should not break it for anyone using a real json parser.
2019-06-06 19:42:30 +00:00
| otherwise ->
starting "add" (ActionItemTreeFile file) si $
make CommandStart return a StartMessage The goal is to be able to run CommandStart in the main thread when -J is used, rather than unncessarily passing it off to a worker thread, which incurs overhead that is signficant when the CommandStart is going to quickly decide to stop. To do that, the message it displays needs to be displayed in the worker thread, after the CommandStart has run. Also, the change will mean that CommandStart will no longer necessarily run with the same Annex state as CommandPerform. While its docs already said it should avoid modifying Annex state, I audited all the CommandStart code as part of the conversion. (Note that CommandSeek already sometimes runs with a different Annex state, and that has not been a source of any problems, so I am not too worried that this change will lead to breakage going forward.) The only modification of Annex state I found was it calling allowMessages in some Commands that default to noMessages. Dealt with that by adding a startCustomOutput and a startingUsualMessages. This lets a command start with noMessages and then select the output it wants for each CommandStart. One bit of breakage: onlyActionOn has been removed from commands that used it. The plan is that, since a StartMessage contains an ActionItem, when a Key can be extracted from that, the parallel job runner can run onlyActionOn' automatically. Then commands won't need to worry about this detail. Future work. Otherwise, this was a fairly straightforward process of making each CommandStart compile again. Hopefully other behavior changes were mostly avoided. In a few cases, a command had a CommandStart that called a CommandPerform that then called showStart multiple times. I have collapsed those down to a single start action. The main command to perhaps suffer from it is Command.Direct, which used to show a start for each file, and no longer does. Another minor behavior change is that some commands used showStart before, but had an associated file and a Key available, so were changed to ShowStart with an ActionItemAssociatedFile. That will not change the normal output or behavior, but --json output will now include the key. This should not break it for anyone using a real json parser.
2019-06-06 19:42:30 +00:00
if isSymbolicLink s
then next $ addFile Small (checkGitIgnoreOption o) file
else perform o file addunlockedmatcher
addpresent key =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
2017-12-05 19:00:50 +00:00
Just s | isSymbolicLink s -> fixuplink key
_ -> add
fixuplink key =
starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $ do
liftIO $ removeFile (fromRawFilePath file)
addLink (checkGitIgnoreOption o) file key Nothing
next $ cleanup key =<< inAnnex key
fixuppointer key =
starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $ do
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
next $ addFile Large (checkGitIgnoreOption o) file
perform :: AddOptions -> RawFilePath -> AddUnlockedMatcher -> CommandPerform
perform o file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
lockingfile <- not <$> addUnlocked addunlockedmatcher
2021-03-01 20:34:40 +00:00
(MatchingFile (FileInfo file file Nothing))
True
let cfg = LockDownConfig
{ lockingFile = lockingfile
, hardlinkFileTmpDir = Just tmpdir
}
ld <- lockDown cfg (fromRawFilePath file)
let sizer = keySource <$> ld
v <- metered Nothing sizer $ \_meter meterupdate ->
ingestAdd (checkGitIgnoreOption o) meterupdate ld
finish v
where
finish (Just key) = next $ cleanup key True
finish Nothing = stop
cleanup :: Key -> Bool -> CommandCleanup
cleanup key hascontent = do
maybeShowJSON $ JSONChunk [("key", serializeKey key)]
when hascontent $
logStatus key InfoPresent
return True