git-annex/Command/Add.hs
Joey Hess 18e00500ce
bwlimit
Added annex.bwlimit and remote.name.annex-bwlimit config that works for git
remotes and many but not all special remotes.

This nearly works, at least for a git remote on the same disk. With it set
to 100kb/1s, the meter displays an actual bandwidth of 128 kb/s, with
occasional spikes to 160 kb/s. So it needs to delay just a bit longer...
I'm unsure why.

However, at the beginning a lot of data flows before it determines the
right bandwidth limit. A granularity of less than 1s would probably improve
that.

And, I don't know yet if it makes sense to have it be 100ks/1s rather than
100kb/s. Is there a situation where the user would want a larger
granularity? Does granulatity need to be configurable at all? I only used that
format for the config really in order to reuse an existing parser.

This can't support for external special remotes, or for ones that
themselves shell out to an external command. (Well, it could, but it
would involve pausing and resuming the child process tree, which seems
very hard to implement and very strange besides.) There could also be some
built-in special remotes that it still doesn't work for, due to them not
having a progress meter whose displays blocks the bandwidth using thread.
But I don't think there are actually any that run a separate thread for
downloads than the thread that displays the progress meter.

Sponsored-by: Graham Spencer on Patreon
2021-09-21 16:58:10 -04:00

207 lines
6.4 KiB
Haskell

{- git-annex command
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Command.Add where
import Command
import Annex.Ingest
import Logs.Location
import Annex.Content
import qualified Annex
import qualified Annex.Queue
import qualified Database.Keys
import Annex.FileMatcher
import Annex.Link
import Annex.Tmp
import Messages.Progress
import Git.FilePath
import Config.GitConfig
import Config.Smudge
import Utility.OptParse
import Utility.InodeCache
import Annex.InodeSentinal
import qualified Utility.RawFilePath as R
cmd :: Command
cmd = notBareRepo $
withGlobalOptions opts $
command "add" SectionCommon "add files to annex"
paramPaths (seek <$$> optParser)
where
opts =
[ jobsOption
, jsonOptions
, jsonProgressOption
, fileMatchingOptions LimitDiskFiles
]
data AddOptions = AddOptions
{ addThese :: CmdParams
, batchOption :: BatchMode
, updateOnly :: Bool
, largeFilesOverride :: Maybe Bool
, checkGitIgnoreOption :: CheckGitIgnore
}
optParser :: CmdParamsDesc -> Parser AddOptions
optParser desc = AddOptions
<$> cmdParams desc
<*> parseBatchOption False
<*> switch
( long "update"
<> short 'u'
<> help "only update tracked files"
)
<*> (parseforcelarge <|> parseforcesmall)
<*> checkGitIgnoreSwitch
where
parseforcelarge = flag Nothing (Just True)
( long "force-large"
<> help "add all files to annex, ignoring other configuration"
)
parseforcesmall = flag Nothing (Just False)
( long "force-small"
<> help "add all files to git, ignoring other configuration"
)
checkGitIgnoreSwitch :: Parser CheckGitIgnore
checkGitIgnoreSwitch = CheckGitIgnore <$>
invertableSwitch "check-gitignore" True
(help "Do not check .gitignore when adding files")
seek :: AddOptions -> CommandSeek
seek o = startConcurrency commandStages $ do
largematcher <- largeFilesMatcher
addunlockedmatcher <- addUnlockedMatcher
annexdotfiles <- getGitConfigVal annexDotFiles
let gofile (si, file) = case largeFilesOverride o of
Nothing ->
ifM (pure (annexdotfiles || not (dotfile file))
<&&> (checkFileMatcher largematcher file
<||> Annex.getState Annex.force))
( start o si file addunlockedmatcher
, ifM (annexAddSmallFiles <$> Annex.getGitConfig)
( startSmall o si file
, stop
)
)
Just True -> start o si file addunlockedmatcher
Just False -> startSmallOverridden o si file
case batchOption o of
Batch fmt
| updateOnly o ->
giveup "--update --batch is not supported"
| otherwise -> batchFiles fmt gofile
NoBatch -> do
-- Avoid git ls-files complaining about files that
-- are not known to git yet, since this will add
-- them. Instead, have workTreeItems warn about other
-- problems, like files that don't exist.
let ww = WarnUnmatchWorkTreeItems
l <- workTreeItems ww (addThese o)
let go a = a ww (commandAction . gofile) l
unless (updateOnly o) $
go (withFilesNotInGit (checkGitIgnoreOption o))
go withFilesMaybeModified
go withUnmodifiedUnlockedPointers
{- Pass file off to git-add. -}
startSmall :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
startSmall o si file =
starting "add" (ActionItemTreeFile file) si $
next $ addSmall (checkGitIgnoreOption o) file
addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool
addSmall ci file = do
showNote "non-large file; adding content to git repository"
addFile Small ci file
startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
startSmallOverridden o si file =
starting "add" (ActionItemTreeFile file) si $ next $ do
showNote "adding content to git repository"
addFile Small (checkGitIgnoreOption o) file
data SmallOrLarge = Small | Large
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool
addFile smallorlarge ci file = do
ps <- gitAddParams ci
cps <- case smallorlarge of
-- In case the file is being converted from an annexed file
-- to be stored in git, remove the cached inode, so that
-- if the smudge clean filter later runs on the file,
-- it will not remember it was annexed.
--
-- The use of bypassSmudgeConfig prevents the smudge
-- filter from being run. So the changes to the database
-- can be queued up and not flushed to disk immediately.
Small -> do
maybe noop Database.Keys.removeInodeCache
=<< withTSDelta (liftIO . genInodeCache file)
return bypassSmudgeConfig
Large -> return []
Annex.Queue.addCommand cps "add" (ps++[Param "--"])
[fromRawFilePath file]
return True
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
start o si file addunlockedmatcher = do
mk <- liftIO $ isPointerFile file
maybe go fixuppointer mk
where
go = ifAnnexed file addpresent add
add = liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
Nothing -> stop
Just s
| not (isRegularFile s) && not (isSymbolicLink s) -> stop
| otherwise ->
starting "add" (ActionItemTreeFile file) si $
if isSymbolicLink s
then next $ addFile Small (checkGitIgnoreOption o) file
else perform o file addunlockedmatcher
addpresent key =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
Just s | isSymbolicLink s -> fixuplink key
_ -> add
fixuplink key =
starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $ do
liftIO $ removeFile (fromRawFilePath file)
addLink (checkGitIgnoreOption o) file key Nothing
next $ cleanup key =<< inAnnex key
fixuppointer key =
starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $ do
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
next $ addFile Large (checkGitIgnoreOption o) file
perform :: AddOptions -> RawFilePath -> AddUnlockedMatcher -> CommandPerform
perform o file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
lockingfile <- not <$> addUnlocked addunlockedmatcher
(MatchingFile (FileInfo file file Nothing))
True
let cfg = LockDownConfig
{ lockingFile = lockingfile
, hardlinkFileTmpDir = Just tmpdir
, checkWritePerms = True
}
ld <- lockDown cfg (fromRawFilePath file)
let sizer = keySource <$> ld
v <- metered Nothing sizer Nothing $ \_meter meterupdate ->
ingestAdd (checkGitIgnoreOption o) meterupdate ld
finish v
where
finish (Just key) = next $ cleanup key True
finish Nothing = stop
cleanup :: Key -> Bool -> CommandCleanup
cleanup key hascontent = do
maybeShowJSON $ JSONChunk [("key", serializeKey key)]
when hascontent $
logStatus key InfoPresent
return True