5b332a87be
Dropping from a cluster drops from every node of the cluster. Including nodes that the cluster does not think have the content. This is different from GET and CHECKPRESENT, which do trust the cluster's location log. The difference is that removing from a cluster should make 100% the content is gone from every node. So doing extra work is ok. Compare with CHECKPRESENT where checking every node could make it very expensive, and the worst that can happen in a false negative is extra work being done. Extended the P2P protocol with FAILURE-PLUS to handle the case where a drop from one node succeeds, but a drop from another node fails. In that case the entire cluster drop has failed. Note that SUCCESS-PLUS is returned when dropping from a proxied remote that is not a cluster, when the protocol version supports it. This is because P2P.Proxy does not know when it's proxying for a single node cluster vs for a remote that is not a cluster.
403 lines
14 KiB
Haskell
403 lines
14 KiB
Haskell
{- git-annex command
|
|
-
|
|
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
{-# LANGUAGE ApplicativeDo, OverloadedStrings #-}
|
|
|
|
module Command.Import where
|
|
|
|
import Command
|
|
import qualified Git
|
|
import qualified Annex
|
|
import qualified Command.Add
|
|
import qualified Command.Reinject
|
|
import qualified Types.Remote as Remote
|
|
import qualified Git.Ref
|
|
import Utility.CopyFile
|
|
import Utility.OptParse
|
|
import Backend
|
|
import Types.KeySource
|
|
import Annex.CheckIgnore
|
|
import Annex.NumCopies
|
|
import Annex.FileMatcher
|
|
import Annex.Ingest
|
|
import Annex.InodeSentinal
|
|
import Annex.Import
|
|
import Annex.Perms
|
|
import Annex.RemoteTrackingBranch
|
|
import Utility.InodeCache
|
|
import Logs.Location
|
|
import Git.FilePath
|
|
import Git.Types
|
|
import Types.Import
|
|
import Utility.Metered
|
|
import qualified Utility.RawFilePath as R
|
|
|
|
import Control.Concurrent.STM
|
|
import System.PosixCompat.Files (isDirectory, isSymbolicLink, isRegularFile)
|
|
|
|
cmd :: Command
|
|
cmd = notBareRepo $
|
|
withAnnexOptions opts $
|
|
command "import" SectionCommon
|
|
"add a tree of files to the repository"
|
|
(paramPaths ++ "|BRANCH")
|
|
(seek <$$> optParser)
|
|
where
|
|
opts =
|
|
[ backendOption
|
|
, jobsOption
|
|
, jsonOptions
|
|
, jsonProgressOption
|
|
-- These options are only used when importing from a
|
|
-- directory, not from a special remote. So it's ok
|
|
-- to use LimitDiskFiles.
|
|
, fileMatchingOptions LimitDiskFiles
|
|
]
|
|
|
|
data ImportOptions
|
|
= LocalImportOptions
|
|
{ importFiles :: CmdParams
|
|
, duplicateMode :: DuplicateMode
|
|
, checkGitIgnoreOption :: CheckGitIgnore
|
|
}
|
|
| RemoteImportOptions
|
|
{ importFromRemote :: DeferredParse Remote
|
|
, importToBranch :: Branch
|
|
, importToSubDir :: Maybe FilePath
|
|
, importContent :: Bool
|
|
, checkGitIgnoreOption :: CheckGitIgnore
|
|
, messageOption :: [String]
|
|
}
|
|
|
|
optParser :: CmdParamsDesc -> Parser ImportOptions
|
|
optParser desc = do
|
|
ps <- cmdParams desc
|
|
mfromremote <- optional $ mkParseRemoteOption <$> parseFromOption
|
|
content <- invertableSwitch "content" True
|
|
( help "do not get contents of imported files"
|
|
)
|
|
dupmode <- fromMaybe Default <$> optional duplicateModeParser
|
|
ic <- Command.Add.checkGitIgnoreSwitch
|
|
message <- many (strOption
|
|
( long "message" <> short 'm' <> metavar "MSG"
|
|
<> help "commit message"
|
|
))
|
|
pure $ case mfromremote of
|
|
Nothing -> LocalImportOptions ps dupmode ic
|
|
Just r -> case ps of
|
|
[bs] ->
|
|
let (branch, subdir) = separate (== ':') bs
|
|
in RemoteImportOptions r
|
|
(Ref (encodeBS branch))
|
|
(if null subdir then Nothing else Just subdir)
|
|
content
|
|
ic
|
|
message
|
|
_ -> giveup "expected BRANCH[:SUBDIR]"
|
|
|
|
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
|
|
deriving (Eq)
|
|
|
|
duplicateModeParser :: Parser DuplicateMode
|
|
duplicateModeParser =
|
|
flag' Duplicate
|
|
( long "duplicate"
|
|
<> help "do not delete source files"
|
|
)
|
|
<|> flag' DeDuplicate
|
|
( long "deduplicate"
|
|
<> help "delete source files whose content was imported before"
|
|
)
|
|
<|> flag' CleanDuplicates
|
|
( long "clean-duplicates"
|
|
<> help "delete duplicate source files (import nothing)"
|
|
)
|
|
<|> flag' SkipDuplicates
|
|
( long "skip-duplicates"
|
|
<> help "import only new files (do not delete source files)"
|
|
)
|
|
<|> flag' ReinjectDuplicates
|
|
( long "reinject-duplicates"
|
|
<> help "import new files, and reinject the content of files that were imported before"
|
|
)
|
|
|
|
seek :: ImportOptions -> CommandSeek
|
|
seek o@(LocalImportOptions {}) = startConcurrency commandStages $ do
|
|
repopath <- liftIO . absPath =<< fromRepo Git.repoPath
|
|
inrepops <- liftIO $ filter (dirContains repopath)
|
|
<$> mapM (absPath . toRawFilePath) (importFiles o)
|
|
unless (null inrepops) $ do
|
|
qp <- coreQuotePath <$> Annex.getGitConfig
|
|
giveup $ decodeBS $ quote qp $
|
|
"cannot import files from inside the working tree (use git annex add instead): "
|
|
<> quotedPaths inrepops
|
|
largematcher <- largeFilesMatcher
|
|
addunlockedmatcher <- addUnlockedMatcher
|
|
(commandAction . startLocal o addunlockedmatcher largematcher (duplicateMode o))
|
|
`withPathContents` importFiles o
|
|
seek o@(RemoteImportOptions {}) = startConcurrency commandStages $ do
|
|
r <- getParsed (importFromRemote o)
|
|
unlessM (Remote.isImportSupported r) $
|
|
giveup "That remote does not support imports."
|
|
subdir <- maybe
|
|
(pure Nothing)
|
|
(Just <$$> inRepo . toTopFilePath . toRawFilePath)
|
|
(importToSubDir o)
|
|
seekRemote r (importToBranch o) subdir (importContent o)
|
|
(checkGitIgnoreOption o)
|
|
(messageOption o)
|
|
|
|
startLocal :: ImportOptions -> AddUnlockedMatcher -> GetFileMatcher -> DuplicateMode -> (RawFilePath, RawFilePath) -> CommandStart
|
|
startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
|
|
ifM (liftIO $ isRegularFile <$> R.getSymbolicLinkStatus srcfile)
|
|
( starting "import" ai si pickaction
|
|
, stop
|
|
)
|
|
where
|
|
ai = ActionItemTreeFile destfile
|
|
si = SeekInput []
|
|
|
|
deletedup k = do
|
|
showNote $ UnquotedString $ "duplicate of " ++ serializeKey k
|
|
verifyExisting k destfile
|
|
( do
|
|
liftIO $ R.removeLink srcfile
|
|
next $ return True
|
|
, do
|
|
warning "Could not verify that the content is still present in the annex; not removing from the import location."
|
|
stop
|
|
)
|
|
reinject k = do
|
|
showNote "reinjecting"
|
|
Command.Reinject.perform srcfile k
|
|
importfile ld k = checkdestdir $ do
|
|
ignored <- checkIgnored (checkGitIgnoreOption o) destfile
|
|
if ignored
|
|
then do
|
|
warning $ "not importing " <> QuotedPath destfile <> " which is .gitignored (use --no-check-gitignore to override)"
|
|
stop
|
|
else do
|
|
existing <- liftIO (catchMaybeIO $ R.getSymbolicLinkStatus destfile)
|
|
case existing of
|
|
Nothing -> importfilechecked ld k
|
|
Just s
|
|
| isDirectory s -> notoverwriting "(is a directory)"
|
|
| isSymbolicLink s -> ifM (Annex.getRead Annex.force)
|
|
( do
|
|
liftIO $ removeWhenExistsWith R.removeLink destfile
|
|
importfilechecked ld k
|
|
, notoverwriting "(is a symlink)"
|
|
)
|
|
| otherwise -> ifM (Annex.getRead Annex.force)
|
|
( do
|
|
liftIO $ removeWhenExistsWith R.removeLink destfile
|
|
importfilechecked ld k
|
|
, notoverwriting "(use --force to override, or a duplication option such as --deduplicate to clean up)"
|
|
)
|
|
checkdestdir cont = do
|
|
let destdir = parentDir destfile
|
|
existing <- liftIO (catchMaybeIO $ R.getSymbolicLinkStatus destdir)
|
|
case existing of
|
|
Nothing -> cont
|
|
Just s
|
|
| isDirectory s -> cont
|
|
| otherwise -> do
|
|
warning $ "not importing " <> QuotedPath destfile <> " because " <> QuotedPath destdir <> " is not a directory"
|
|
stop
|
|
|
|
importfilechecked ld k = do
|
|
-- Move or copy the src file to the dest file.
|
|
-- The dest file is what will be ingested.
|
|
createWorkTreeDirectory (parentDir destfile)
|
|
unwind <- liftIO $ if mode == Duplicate || mode == SkipDuplicates
|
|
then do
|
|
void $ copyFileExternal CopyAllMetaData
|
|
(fromRawFilePath srcfile)
|
|
(fromRawFilePath destfile)
|
|
return $ removeWhenExistsWith R.removeLink destfile
|
|
else do
|
|
moveFile srcfile destfile
|
|
return $ moveFile destfile srcfile
|
|
-- Make sure that the dest file has its write permissions
|
|
-- removed; the src file normally already did, but may
|
|
-- have imported it from a filesystem that does not allow
|
|
-- removing write permissions, to a repo on a filesystem
|
|
-- that does.
|
|
when (lockingFile (lockDownConfig ld)) $ do
|
|
freezeContent destfile
|
|
checkLockedDownWritePerms destfile srcfile >>= \case
|
|
Just err -> do
|
|
liftIO unwind
|
|
qp <- coreQuotePath <$> Annex.getGitConfig
|
|
giveup (decodeBS $ quote qp err)
|
|
Nothing -> noop
|
|
-- Get the inode cache of the dest file. It should be
|
|
-- weakly the same as the originally locked down file's
|
|
-- inode cache. (Since the file may have been copied,
|
|
-- its inodes may not be the same.)
|
|
s <- liftIO $ R.getSymbolicLinkStatus destfile
|
|
newcache <- withTSDelta $ \d -> liftIO $ toInodeCache d destfile s
|
|
let unchanged = case (newcache, inodeCache (keySource ld)) of
|
|
(_, Nothing) -> True
|
|
(Just newc, Just c) | compareWeak c newc -> True
|
|
_ -> False
|
|
unless unchanged $
|
|
giveup "changed while it was being added"
|
|
-- The LockedDown needs to be adjusted, since the destfile
|
|
-- is what will be ingested.
|
|
let ld' = ld
|
|
{ keySource = KeySource
|
|
{ keyFilename = destfile
|
|
, contentLocation = destfile
|
|
, inodeCache = newcache
|
|
}
|
|
}
|
|
ifM (checkFileMatcher largematcher destfile)
|
|
( ingestAdd' nullMeterUpdate (Just ld') (Just k)
|
|
>>= maybe
|
|
stop
|
|
(\addedk -> next $ Command.Add.cleanup addedk True)
|
|
, Command.Add.addSmall (DryRun False) destfile s
|
|
)
|
|
notoverwriting why = do
|
|
warning $ "not overwriting existing " <> QuotedPath destfile <> " " <> UnquotedString why
|
|
stop
|
|
lockdown a = do
|
|
let mi = MatchingFile $ FileInfo
|
|
{ contentFile = srcfile
|
|
, matchFile = destfile
|
|
, matchKey = Nothing
|
|
}
|
|
lockingfile <- not <$> addUnlocked addunlockedmatcher mi True
|
|
-- Minimal lock down with no hard linking so nothing
|
|
-- has to be done to clean up from it.
|
|
let cfg = LockDownConfig
|
|
{ lockingFile = lockingfile
|
|
, hardlinkFileTmpDir = Nothing
|
|
-- The write perms of the file may not be able to be
|
|
-- removed, if it's being imported from a crippled
|
|
-- filesystem. So lockDown is asked to not check
|
|
-- the write perms. They will be checked later, after
|
|
-- the file gets copied into the repository.
|
|
, checkWritePerms = False
|
|
}
|
|
v <- lockDown cfg (fromRawFilePath srcfile)
|
|
case v of
|
|
Just ld -> do
|
|
backend <- chooseBackend destfile
|
|
k <- fst <$> genKey (keySource ld) nullMeterUpdate backend
|
|
a (ld, k)
|
|
Nothing -> stop
|
|
checkdup k dupa notdupa = ifM (isKnownKey k)
|
|
( dupa
|
|
, notdupa
|
|
)
|
|
pickaction = lockdown $ \(ld, k) -> case mode of
|
|
DeDuplicate -> checkdup k (deletedup k) (importfile ld k)
|
|
CleanDuplicates -> checkdup k
|
|
(deletedup k)
|
|
(skipbecause "not duplicate")
|
|
SkipDuplicates -> checkdup k
|
|
(skipbecause "duplicate")
|
|
(importfile ld k)
|
|
ReinjectDuplicates -> checkdup k
|
|
(reinject k)
|
|
(importfile ld k)
|
|
_ -> importfile ld k
|
|
skipbecause s = do
|
|
showNote (s <> "; skipping")
|
|
next (return True)
|
|
|
|
verifyExisting :: Key -> RawFilePath -> (CommandPerform, CommandPerform) -> CommandPerform
|
|
verifyExisting key destfile (yes, no) = do
|
|
-- Look up the numcopies setting for the file that it would be
|
|
-- imported to, if it were imported.
|
|
(needcopies, mincopies) <- getFileNumMinCopies destfile
|
|
|
|
(tocheck, preverified) <- verifiableCopies key []
|
|
verifyEnoughCopiesToDrop [] key Nothing Nothing needcopies mincopies [] preverified tocheck
|
|
(const yes) no
|
|
|
|
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CheckGitIgnore -> [String] -> CommandSeek
|
|
seekRemote remote branch msubdir importcontent ci importmessages = do
|
|
importtreeconfig <- case msubdir of
|
|
Nothing -> return ImportTree
|
|
Just subdir ->
|
|
let mk tree = pure $ ImportSubTree subdir tree
|
|
in fromtrackingbranch Git.Ref.tree >>= \case
|
|
Just tree -> mk tree
|
|
Nothing -> inRepo (Git.Ref.tree branch) >>= \case
|
|
Just tree -> mk tree
|
|
Nothing -> giveup $ "Unable to find base tree for branch " ++ fromRef branch
|
|
|
|
trackingcommit <- fromtrackingbranch Git.Ref.sha
|
|
cmode <- annexCommitMode <$> Annex.getGitConfig
|
|
let importcommitconfig = ImportCommitConfig trackingcommit cmode importmessages'
|
|
let commitimport = commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig
|
|
|
|
importabletvar <- liftIO $ newTVarIO Nothing
|
|
void $ includeCommandAction (listContents remote importtreeconfig ci importabletvar)
|
|
liftIO (atomically (readTVar importabletvar)) >>= \case
|
|
Nothing -> return ()
|
|
Just importable -> importChanges remote importtreeconfig importcontent False importable >>= \case
|
|
ImportUnfinished -> warning $ UnquotedString $ concat
|
|
[ "Failed to import some files from "
|
|
, Remote.name remote
|
|
, ". Re-run command to resume import."
|
|
]
|
|
ImportFinished imported -> void $
|
|
includeCommandAction $
|
|
commitimport imported
|
|
where
|
|
importmessages'
|
|
| null importmessages = ["import from " ++ Remote.name remote]
|
|
| otherwise = importmessages
|
|
|
|
tb = mkRemoteTrackingBranch remote branch
|
|
|
|
fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)
|
|
|
|
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize))) -> CommandStart
|
|
listContents remote importtreeconfig ci tvar = starting "list" ai si $
|
|
listContents' remote importtreeconfig ci $ \importable -> do
|
|
liftIO $ atomically $ writeTVar tvar importable
|
|
next $ return True
|
|
where
|
|
ai = ActionItemOther (Just (UnquotedString (Remote.name remote)))
|
|
si = SeekInput []
|
|
|
|
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
|
|
listContents' remote importtreeconfig ci a =
|
|
makeImportMatcher remote >>= \case
|
|
Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case
|
|
Right importable -> a importable
|
|
Left e -> giveup $ "Unable to list contents of " ++ Remote.name remote ++ ": " ++ show e
|
|
Left err -> giveup $ unwords
|
|
[ "Cannot import from"
|
|
, Remote.name remote
|
|
, "because of a problem with its configuration:"
|
|
, err
|
|
]
|
|
|
|
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> Imported -> CommandStart
|
|
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig imported =
|
|
starting "update" ai si $ do
|
|
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig imported
|
|
next $ updateremotetrackingbranch importcommit
|
|
where
|
|
ai = ActionItemOther (Just $ UnquotedString $ fromRef $ fromRemoteTrackingBranch tb)
|
|
si = SeekInput []
|
|
-- Update the tracking branch. Done even when there
|
|
-- is nothing new to import, to make sure it exists.
|
|
updateremotetrackingbranch importcommit =
|
|
case importcommit <|> trackingcommit of
|
|
Just c -> do
|
|
setRemoteTrackingBranch tb c
|
|
return True
|
|
Nothing -> do
|
|
warning $ UnquotedString $ "Nothing to import and " ++ fromRef branch ++ " does not exist."
|
|
return False
|