git-annex/Command/Add.hs

277 lines
8.5 KiB
Haskell
Raw Normal View History

{- git-annex command
-
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.Add where
import Command
2015-12-22 17:23:33 +00:00
import Annex.Ingest
2011-10-15 20:21:08 +00:00
import Logs.Location
2011-10-04 04:40:47 +00:00
import Annex.Content
fully support core.symlinks=false in all relevant symlink handling code Refactored annex link code into nice clean new library. Audited and dealt with calls to createSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ createSymbolicLink linktarget file only when core.symlinks=true Assistant/WebApp/Configurators/Local.hs: createSymbolicLink link link test if symlinks can be made Command/Fix.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/FromKey.hs: liftIO $ createSymbolicLink link file command only works in indirect mode Command/Indirect.hs: liftIO $ createSymbolicLink l f refuses to run if core.symlinks=false Init.hs: createSymbolicLink f f2 test if symlinks can be made Remote/Directory.hs: go [file] = catchBoolIO $ createSymbolicLink file f >> return True fast key linking; catches failure to make symlink and falls back to copy Remote/Git.hs: liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True ditto Upgrade/V1.hs: liftIO $ createSymbolicLink link f v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to readSymbolicLink. Remaining calls are all safe, because: Annex/Link.hs: ( liftIO $ catchMaybeIO $ readSymbolicLink file only when core.symlinks=true Assistant/Threads/Watcher.hs: ifM ((==) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) code that fixes real symlinks when inotify sees them It's ok to not fix psdueo-symlinks. Assistant/Threads/Watcher.hs: mlink <- liftIO (catchMaybeIO $ readSymbolicLink file) ditto Command/Fix.hs: stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do command only works in indirect mode Upgrade/V1.hs: getsymlink = takeFileName <$> readSymbolicLink file v1 repos could not be on a filesystem w/o symlinks Audited and dealt with calls to isSymbolicLink. (Typically used with getSymbolicLinkStatus, but that is just used because getFileStatus is not as robust; it also works on pseudolinks.) Remaining calls are all safe, because: Assistant/Threads/SanityChecker.hs: | isSymbolicLink s -> addsymlink file ms only handles staging of symlinks that were somehow not staged (might need to be updated to support pseudolinks, but this is only a belt-and-suspenders check anyway, and I've never seen the code run) Command/Add.hs: if isSymbolicLink s || not (isRegularFile s) avoids adding symlinks to the annex, so not relevant Command/Indirect.hs: | isSymbolicLink s -> void $ flip whenAnnexed f $ only allowed on systems that support symlinks Command/Indirect.hs: whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do ditto Seek.hs:notSymlink f = liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f used to find unlocked files, only relevant in indirect mode Utility/FSEvents.hs: | Files.isSymbolicLink s = runhook addSymlinkHook $ Just s Utility/FSEvents.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: | Files.isSymbolicLink s -> Utility/INotify.hs: checkfiletype Files.isSymbolicLink addSymlinkHook f Utility/Kqueue.hs: | Files.isSymbolicLink s = callhook addSymlinkHook (Just s) change all above are lower-level, not relevant Audited and dealt with calls to isSymLink. Remaining calls are all safe, because: Annex/Direct.hs: | isSymLink (getmode item) = This is looking at git diff-tree objects, not files on disk Command/Unused.hs: | isSymLink (LsTree.mode l) = do This is looking at git ls-tree, not file on disk Utility/FileMode.hs:isSymLink :: FileMode -> Bool Utility/FileMode.hs:isSymLink = checkMode symbolicLinkMode low-level Done!!
2013-02-17 19:05:55 +00:00
import qualified Annex
import qualified Annex.Queue
import qualified Database.Keys
import Annex.FileMatcher
import Annex.Link
import Annex.Tmp
import Annex.HashObject
import Annex.WorkTree
import Messages.Progress
import Git.FilePath
import Git.Types
import Git.UpdateIndex
import Config.GitConfig
import Utility.OptParse
import Utility.InodeCache
import Annex.InodeSentinal
import Annex.CheckIgnore
import qualified Utility.RawFilePath as R
import qualified System.FilePath.ByteString as P
import System.PosixCompat.Files (fileSize, isSymbolicLink, isRegularFile, modificationTime, fileID, deviceID, fileMode, ownerExecuteMode, intersectFileModes)
cmd :: Command
cmd = notBareRepo $
withAnnexOptions opts $
command "add" SectionCommon "add files to annex"
paramPaths (seek <$$> optParser)
where
opts =
[ backendOption
, jobsOption
, jsonOptions
, jsonProgressOption
, fileMatchingOptions LimitDiskFiles
]
data AddOptions = AddOptions
{ addThese :: CmdParams
2016-01-19 21:46:46 +00:00
, batchOption :: BatchMode
, updateOnly :: Bool
, largeFilesOverride :: Maybe Bool
, checkGitIgnoreOption :: CheckGitIgnore
, dryRunOption :: DryRun
}
optParser :: CmdParamsDesc -> Parser AddOptions
optParser desc = AddOptions
<$> cmdParams desc
<*> parseBatchOption False
<*> switch
( long "update"
<> short 'u'
<> help "only update tracked files"
)
<*> (parseforcelarge <|> parseforcesmall)
<*> checkGitIgnoreSwitch
<*> parseDryRunOption
where
parseforcelarge = flag Nothing (Just True)
( long "force-large"
<> help "add all files to annex, ignoring other configuration"
)
parseforcesmall = flag Nothing (Just False)
( long "force-small"
<> help "add all files to git, ignoring other configuration"
)
checkGitIgnoreSwitch :: Parser CheckGitIgnore
checkGitIgnoreSwitch = CheckGitIgnore <$>
invertableSwitch "check-gitignore" True
(help "Do not check .gitignore when adding files")
seek :: AddOptions -> CommandSeek
seek o = startConcurrency commandStages (seek' o)
seek' :: AddOptions -> CommandSeek
seek' o = do
largematcher <- largeFilesMatcher
addunlockedmatcher <- addUnlockedMatcher
annexdotfiles <- getGitConfigVal annexDotFiles
let gofile includingsmall (si, file) = case largeFilesOverride o of
Nothing -> do
isdotfile <- if annexdotfiles
then pure False
else dotfile . getTopFilePath
<$> inRepo (toTopFilePath file)
islarge <- checkFileMatcher NoLiveUpdate largematcher file
<||> Annex.getRead Annex.force
if (not isdotfile && islarge)
then start dr si file addunlockedmatcher
else if includingsmall
then ifM (annexAddSmallFiles <$> Annex.getGitConfig)
( startSmall isdotfile dr si file
, stop
)
else stop
Just True -> start dr si file addunlockedmatcher
Just False -> startSmallOverridden dr si file
2016-01-19 21:46:46 +00:00
case batchOption o of
Batch fmt
| updateOnly o ->
giveup "--update --batch is not supported"
| otherwise -> batchOnly Nothing (addThese o) $
batchFiles fmt $ \v@(_si, file) ->
ifM (checkIgnored (checkGitIgnoreOption o) file)
( stop
, gofile True v
)
2016-01-19 21:46:46 +00:00
NoBatch -> do
-- Avoid git ls-files complaining about files that
-- are not known to git yet, since this will add
-- them. Instead, have workTreeItems warn about other
-- problems, like files that don't exist.
let ww = WarnUnmatchWorkTreeItems "add"
l <- workTreeItems ww (addThese o)
let go b a = a ww (commandAction . gofile b) l
unless (updateOnly o) $
go True (withFilesNotInGit (checkGitIgnoreOption o))
go True withFilesMaybeModified
-- Convert newly unlocked files back to locked files,
-- same as a modified unlocked file would get
-- locked when added.
go False withUnmodifiedUnlockedPointers
where
dr = dryRunOption o
2010-11-11 22:54:52 +00:00
{- Pass file off to git-add. -}
startSmall :: Bool -> DryRun -> SeekInput -> RawFilePath -> CommandStart
startSmall isdotfile dr si file =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
Just s ->
starting "add" (ActionItemTreeFile file) si $
addSmall isdotfile dr file s
Nothing -> stop
addSmall :: Bool -> DryRun -> RawFilePath -> FileStatus -> CommandPerform
addSmall isdotfile dr file s = do
showNote $ (if isdotfile then "dotfile" else "non-large file")
<> "; adding content to git repository"
skipWhenDryRun dr $ next $ addFile Small file s
startSmallOverridden :: DryRun -> SeekInput -> RawFilePath -> CommandStart
startSmallOverridden dr si file =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
Just s -> starting "add" (ActionItemTreeFile file) si $ do
showNote "adding content to git repository"
skipWhenDryRun dr $ next $ addFile Small file s
Nothing -> stop
data SmallOrLarge = Small | Large
addFile :: SmallOrLarge -> RawFilePath -> FileStatus -> Annex Bool
addFile smallorlarge file s = do
sha <- if isSymbolicLink s
then hashBlob =<< liftIO (R.readSymbolicLink file)
else if isRegularFile s
then hashFile file
else do
qp <- coreQuotePath <$> Annex.getGitConfig
giveup $ decodeBS $ quote qp $
file <> " is not a regular file"
let treetype = if isSymbolicLink s
then TreeSymlink
else if intersectFileModes ownerExecuteMode (fileMode s) /= 0
then TreeExecutable
else TreeFile
s' <- liftIO $ catchMaybeIO $ R.getSymbolicLinkStatus file
if maybe True (changed s) s'
then do
warning $ QuotedPath file <> " changed while it was being added"
return False
else do
case smallorlarge of
-- In case the file is being converted from
-- an annexed file to be stored in git,
-- remove the cached inode, so that if the
-- smudge clean filter later runs on the file,
-- it will not remember it was annexed.
Small -> maybe noop Database.Keys.removeInodeCache
=<< withTSDelta (liftIO . genInodeCache file)
Large -> noop
Annex.Queue.addUpdateIndex =<<
2023-02-27 19:02:53 +00:00
inRepo (stageFile sha treetype file)
return True
2012-11-12 05:05:04 +00:00
where
changed a b =
deviceID a /= deviceID b ||
fileID a /= fileID b ||
fileSize a /= fileSize b ||
modificationTime a /= modificationTime b ||
isRegularFile a /= isRegularFile b ||
isSymbolicLink a /= isSymbolicLink b
start :: DryRun -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
start dr si file addunlockedmatcher =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
2017-12-05 19:00:50 +00:00
Nothing -> stop
Just s
2017-12-05 19:00:50 +00:00
| not (isRegularFile s) && not (isSymbolicLink s) -> stop
| otherwise -> do
mk <- liftIO $ isPointerFile file
maybe (go s) (fixuppointer s) mk
where
go s = lookupKey file >>= \case
Just k -> addpresent s k
Nothing -> add s
add s = starting "add" (ActionItemTreeFile file) si $
skipWhenDryRun dr $
if isSymbolicLink s
then next $ addFile Small file s
else perform file addunlockedmatcher
addpresent s key
| isSymbolicLink s = fixuplink key
| otherwise = add s
fixuplink key =
starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $
skipWhenDryRun dr $ withOtherTmp $ \tmp -> do
let tmpf = tmp P.</> P.takeFileName file
liftIO $ moveFile file tmpf
ifM (isSymbolicLink <$> liftIO (R.getSymbolicLinkStatus tmpf))
( do
liftIO $ R.removeLink tmpf
addSymlink file key Nothing
next $ cleanup key =<< inAnnex key
, do
liftIO $ moveFile tmpf file
next $ return True
)
fixuppointer s key =
starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $
skipWhenDryRun dr $ do
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
next $ addFile Large file s
perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform
perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
lockingfile <- not <$> addUnlocked addunlockedmatcher
2021-03-01 20:34:40 +00:00
(MatchingFile (FileInfo file file Nothing))
True
let cfg = LockDownConfig
{ lockingFile = lockingfile
, hardlinkFileTmpDir = Just tmpdir
, checkWritePerms = True
}
ld <- lockDown cfg (fromRawFilePath file)
let sizer = keySource <$> ld
v <- metered Nothing sizer Nothing $ \_meter meterupdate ->
ingestAdd meterupdate ld
finish v
where
finish (Just key) = next $ cleanup key True
finish Nothing = stop
cleanup :: Key -> Bool -> CommandCleanup
cleanup key hascontent = do
maybeShowJSON $ JSONChunk [("key", serializeKey key)]
when hascontent $
logStatus NoLiveUpdate key InfoPresent
return True