git-annex/Command/Import.hs

404 lines
14 KiB
Haskell
Raw Normal View History

{- git-annex command
-
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE ApplicativeDo, OverloadedStrings #-}
2019-02-26 16:06:19 +00:00
module Command.Import where
import Command
import qualified Git
import qualified Annex
import qualified Command.Add
import qualified Command.Reinject
2019-02-26 16:06:19 +00:00
import qualified Types.Remote as Remote
import qualified Git.Ref
import Utility.CopyFile
import Utility.OptParse
import Backend
import Types.KeySource
import Annex.CheckIgnore
import Annex.NumCopies
import Annex.FileMatcher
import Annex.Ingest
import Annex.InodeSentinal
2019-02-26 16:06:19 +00:00
import Annex.Import
import Annex.Perms
import Annex.RemoteTrackingBranch
import Utility.InodeCache
import Logs.Location
2019-02-26 16:06:19 +00:00
import Git.FilePath
import Git.Types
import Types.Import
import Utility.Metered
import qualified Utility.RawFilePath as R
import Control.Concurrent.STM
import System.PosixCompat.Files (isDirectory, isSymbolicLink, isRegularFile)
cmd :: Command
cmd = notBareRepo $
withAnnexOptions opts $
command "import" SectionCommon
"add a tree of files to the repository"
(paramPaths ++ "|BRANCH")
(seek <$$> optParser)
where
opts =
[ backendOption
, jobsOption
, jsonOptions
, jsonProgressOption
-- These options are only used when importing from a
-- directory, not from a special remote. So it's ok
-- to use LimitDiskFiles.
, fileMatchingOptions LimitDiskFiles
]
2019-02-26 16:06:19 +00:00
data ImportOptions
= LocalImportOptions
{ importFiles :: CmdParams
, duplicateMode :: DuplicateMode
, checkGitIgnoreOption :: CheckGitIgnore
2019-02-26 16:06:19 +00:00
}
| RemoteImportOptions
{ importFromRemote :: DeferredParse Remote
, importToBranch :: Branch
, importToSubDir :: Maybe FilePath
, importContent :: Bool
, checkGitIgnoreOption :: CheckGitIgnore
, messageOption :: [String]
2019-02-26 16:06:19 +00:00
}
2015-07-13 15:15:21 +00:00
optParser :: CmdParamsDesc -> Parser ImportOptions
optParser desc = do
ps <- cmdParams desc
mfromremote <- optional $ mkParseRemoteOption <$> parseFromOption
content <- invertableSwitch "content" True
( help "do not get contents of imported files"
)
dupmode <- fromMaybe Default <$> optional duplicateModeParser
ic <- Command.Add.checkGitIgnoreSwitch
message <- many (strOption
2024-01-18 16:41:44 +00:00
( long "message" <> short 'm' <> metavar "MSG"
<> help "commit message"
))
pure $ case mfromremote of
Nothing -> LocalImportOptions ps dupmode ic
Just r -> case ps of
[bs] ->
let (branch, subdir) = separate (== ':') bs
in RemoteImportOptions r
(Ref (encodeBS branch))
(if null subdir then Nothing else Just subdir)
content
ic
2024-01-18 16:41:44 +00:00
message
_ -> giveup "expected BRANCH[:SUBDIR]"
2019-02-26 16:06:19 +00:00
data DuplicateMode = Default | Duplicate | DeDuplicate | CleanDuplicates | SkipDuplicates | ReinjectDuplicates
deriving (Eq)
2015-07-13 15:15:21 +00:00
duplicateModeParser :: Parser DuplicateMode
duplicateModeParser =
flag' Duplicate
( long "duplicate"
<> help "do not delete source files"
)
<|> flag' DeDuplicate
( long "deduplicate"
<> help "delete source files whose content was imported before"
)
<|> flag' CleanDuplicates
( long "clean-duplicates"
<> help "delete duplicate source files (import nothing)"
)
<|> flag' SkipDuplicates
( long "skip-duplicates"
<> help "import only new files (do not delete source files)"
)
<|> flag' ReinjectDuplicates
( long "reinject-duplicates"
<> help "import new files, and reinject the content of files that were imported before"
2015-07-13 15:15:21 +00:00
)
2015-07-13 15:15:21 +00:00
seek :: ImportOptions -> CommandSeek
seek o@(LocalImportOptions {}) = startConcurrency commandStages $ do
repopath <- liftIO . absPath =<< fromRepo Git.repoPath
inrepops <- liftIO $ filter (dirContains repopath)
<$> mapM (absPath . toRawFilePath) (importFiles o)
unless (null inrepops) $ do
qp <- coreQuotePath <$> Annex.getGitConfig
giveup $ decodeBS $ quote qp $
"cannot import files from inside the working tree (use git annex add instead): "
<> quotedPaths inrepops
largematcher <- largeFilesMatcher
addunlockedmatcher <- addUnlockedMatcher
(commandAction . startLocal o addunlockedmatcher largematcher (duplicateMode o))
`withPathContents` importFiles o
seek o@(RemoteImportOptions {}) = startConcurrency commandStages $ do
2019-02-26 16:06:19 +00:00
r <- getParsed (importFromRemote o)
unlessM (Remote.isImportSupported r) $
giveup "That remote does not support imports."
2019-02-26 16:06:19 +00:00
subdir <- maybe
(pure Nothing)
(Just <$$> inRepo . toTopFilePath . toRawFilePath)
2019-02-26 16:06:19 +00:00
(importToSubDir o)
2024-01-18 16:41:44 +00:00
seekRemote r (importToBranch o) subdir (importContent o)
(checkGitIgnoreOption o)
(messageOption o)
startLocal :: ImportOptions -> AddUnlockedMatcher -> GetFileMatcher -> DuplicateMode -> (RawFilePath, RawFilePath) -> CommandStart
startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
ifM (liftIO $ isRegularFile <$> R.getSymbolicLinkStatus srcfile)
( starting "import" ai si pickaction
, stop
)
where
ai = ActionItemTreeFile destfile
si = SeekInput []
deletedup k = do
showNote $ UnquotedString $ "duplicate of " ++ serializeKey k
2015-10-09 15:09:46 +00:00
verifyExisting k destfile
( do
liftIO $ R.removeLink srcfile
next $ return True
, do
2015-04-30 18:10:28 +00:00
warning "Could not verify that the content is still present in the annex; not removing from the import location."
stop
)
reinject k = do
showNote "reinjecting"
Command.Reinject.perform srcfile k
importfile ld k = checkdestdir $ do
ignored <- checkIgnored (checkGitIgnoreOption o) destfile
if ignored
then do
warning $ "not importing " <> QuotedPath destfile <> " which is .gitignored (use --no-check-gitignore to override)"
stop
else do
existing <- liftIO (catchMaybeIO $ R.getSymbolicLinkStatus destfile)
case existing of
Nothing -> importfilechecked ld k
Just s
| isDirectory s -> notoverwriting "(is a directory)"
| isSymbolicLink s -> ifM (Annex.getRead Annex.force)
( do
liftIO $ removeWhenExistsWith R.removeLink destfile
importfilechecked ld k
, notoverwriting "(is a symlink)"
)
| otherwise -> ifM (Annex.getRead Annex.force)
( do
liftIO $ removeWhenExistsWith R.removeLink destfile
importfilechecked ld k
, notoverwriting "(use --force to override, or a duplication option such as --deduplicate to clean up)"
)
checkdestdir cont = do
let destdir = parentDir destfile
existing <- liftIO (catchMaybeIO $ R.getSymbolicLinkStatus destdir)
case existing of
Nothing -> cont
Just s
| isDirectory s -> cont
| otherwise -> do
warning $ "not importing " <> QuotedPath destfile <> " because " <> QuotedPath destdir <> " is not a directory"
stop
importfilechecked ld k = do
-- Move or copy the src file to the dest file.
-- The dest file is what will be ingested.
createWorkTreeDirectory (parentDir destfile)
unwind <- liftIO $ if mode == Duplicate || mode == SkipDuplicates
then do
void $ copyFileExternal CopyAllMetaData
(fromRawFilePath srcfile)
(fromRawFilePath destfile)
return $ removeWhenExistsWith R.removeLink destfile
else do
moveFile srcfile destfile
return $ moveFile destfile srcfile
-- Make sure that the dest file has its write permissions
-- removed; the src file normally already did, but may
-- have imported it from a filesystem that does not allow
-- removing write permissions, to a repo on a filesystem
-- that does.
when (lockingFile (lockDownConfig ld)) $ do
freezeContent destfile
checkLockedDownWritePerms destfile srcfile >>= \case
Just err -> do
liftIO unwind
qp <- coreQuotePath <$> Annex.getGitConfig
giveup (decodeBS $ quote qp err)
Nothing -> noop
-- Get the inode cache of the dest file. It should be
-- weakly the same as the originally locked down file's
-- inode cache. (Since the file may have been copied,
-- its inodes may not be the same.)
s <- liftIO $ R.getSymbolicLinkStatus destfile
newcache <- withTSDelta $ \d -> liftIO $ toInodeCache d destfile s
let unchanged = case (newcache, inodeCache (keySource ld)) of
(_, Nothing) -> True
(Just newc, Just c) | compareWeak c newc -> True
_ -> False
unless unchanged $
giveup "changed while it was being added"
-- The LockedDown needs to be adjusted, since the destfile
-- is what will be ingested.
let ld' = ld
{ keySource = KeySource
{ keyFilename = destfile
, contentLocation = destfile
, inodeCache = newcache
}
}
ifM (checkFileMatcher NoLiveUpdate largematcher destfile)
( ingestAdd' nullMeterUpdate (Just ld') (Just k)
>>= maybe
stop
(\addedk -> next $ Command.Add.cleanup addedk True)
, Command.Add.addSmall (DryRun False) destfile s
)
notoverwriting why = do
warning $ "not overwriting existing " <> QuotedPath destfile <> " " <> UnquotedString why
stop
lockdown a = do
let mi = MatchingFile $ FileInfo
2021-03-01 20:34:40 +00:00
{ contentFile = srcfile
, matchFile = destfile
, matchKey = Nothing
}
lockingfile <- not <$> addUnlocked addunlockedmatcher mi True
-- Minimal lock down with no hard linking so nothing
-- has to be done to clean up from it.
let cfg = LockDownConfig
{ lockingFile = lockingfile
, hardlinkFileTmpDir = Nothing
-- The write perms of the file may not be able to be
-- removed, if it's being imported from a crippled
-- filesystem. So lockDown is asked to not check
-- the write perms. They will be checked later, after
-- the file gets copied into the repository.
, checkWritePerms = False
}
v <- lockDown cfg (fromRawFilePath srcfile)
case v of
Just ld -> do
backend <- chooseBackend destfile
k <- fst <$> genKey (keySource ld) nullMeterUpdate backend
a (ld, k)
Nothing -> stop
checkdup k dupa notdupa = ifM (isKnownKey k)
( dupa
, notdupa
)
pickaction = lockdown $ \(ld, k) -> case mode of
DeDuplicate -> checkdup k (deletedup k) (importfile ld k)
CleanDuplicates -> checkdup k
(deletedup k)
(skipbecause "not duplicate")
SkipDuplicates -> checkdup k
(skipbecause "duplicate")
(importfile ld k)
ReinjectDuplicates -> checkdup k
(reinject k)
(importfile ld k)
_ -> importfile ld k
skipbecause s = do
showNote (s <> "; skipping")
next (return True)
verifyExisting :: Key -> RawFilePath -> (CommandPerform, CommandPerform) -> CommandPerform
2015-10-09 15:09:46 +00:00
verifyExisting key destfile (yes, no) = do
-- Look up the numcopies setting for the file that it would be
-- imported to, if it were imported.
(needcopies, mincopies) <- getFileNumMinCopies destfile
(tocheck, preverified) <- verifiableCopies key []
verifyEnoughCopiesToDrop [] key Nothing Nothing needcopies mincopies [] preverified tocheck
2015-10-09 15:09:46 +00:00
(const yes) no
2019-02-26 16:06:19 +00:00
seekRemote :: Remote -> Branch -> Maybe TopFilePath -> Bool -> CheckGitIgnore -> [String] -> CommandSeek
seekRemote remote branch msubdir importcontent ci importmessages = do
importtreeconfig <- case msubdir of
Nothing -> return ImportTree
Just subdir ->
let mk tree = pure $ ImportSubTree subdir tree
in fromtrackingbranch Git.Ref.tree >>= \case
Just tree -> mk tree
Nothing -> inRepo (Git.Ref.tree branch) >>= \case
Just tree -> mk tree
Nothing -> giveup $ "Unable to find base tree for branch " ++ fromRef branch
trackingcommit <- fromtrackingbranch Git.Ref.sha
cmode <- annexCommitMode <$> Annex.getGitConfig
let importcommitconfig = ImportCommitConfig trackingcommit cmode importmessages'
let commitimport = commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig
2019-02-26 18:22:08 +00:00
importabletvar <- liftIO $ newTVarIO Nothing
void $ includeCommandAction (listContents remote importtreeconfig ci importabletvar)
liftIO (atomically (readTVar importabletvar)) >>= \case
Nothing -> return ()
Just importable -> importChanges remote importtreeconfig importcontent False importable >>= \case
ImportUnfinished -> warning $ UnquotedString $ concat
[ "Failed to import some files from "
, Remote.name remote
, ". Re-run command to resume import."
]
ImportFinished imported -> void $
includeCommandAction $
commitimport imported
where
importmessages'
| null importmessages = ["import from " ++ Remote.name remote]
| otherwise = importmessages
tb = mkRemoteTrackingBranch remote branch
fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)
2019-02-26 18:22:08 +00:00
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize))) -> CommandStart
listContents remote importtreeconfig ci tvar = starting "list" ai si $
listContents' remote importtreeconfig ci $ \importable -> do
liftIO $ atomically $ writeTVar tvar importable
next $ return True
where
ai = ActionItemOther (Just (UnquotedString (Remote.name remote)))
si = SeekInput []
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
listContents' remote importtreeconfig ci a =
makeImportMatcher remote >>= \case
Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case
Right importable -> a importable
Left e -> giveup $ "Unable to list contents of " ++ Remote.name remote ++ ": " ++ show e
Left err -> giveup $ unwords
[ "Cannot import from"
, Remote.name remote
, "because of a problem with its configuration:"
, err
]
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> Imported -> CommandStart
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig imported =
starting "update" ai si $ do
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig imported
2019-02-26 18:22:08 +00:00
next $ updateremotetrackingbranch importcommit
where
ai = ActionItemOther (Just $ UnquotedString $ fromRef $ fromRemoteTrackingBranch tb)
si = SeekInput []
2019-02-26 18:22:08 +00:00
-- Update the tracking branch. Done even when there
-- is nothing new to import, to make sure it exists.
updateremotetrackingbranch importcommit =
case importcommit <|> trackingcommit of
2019-02-26 18:22:08 +00:00
Just c -> do
setRemoteTrackingBranch tb c
2019-02-26 18:22:08 +00:00
return True
Nothing -> do
warning $ UnquotedString $ "Nothing to import and " ++ fromRef branch ++ " does not exist."
2019-02-26 18:22:08 +00:00
return False