855 lines
30 KiB
Haskell
855 lines
30 KiB
Haskell
{- git-annex import from remotes
|
|
-
|
|
- Copyright 2019-2021 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Annex.Import (
|
|
ImportTreeConfig(..),
|
|
ImportCommitConfig(..),
|
|
buildImportCommit,
|
|
buildImportTrees,
|
|
recordImportTree,
|
|
canImportKeys,
|
|
importKeys,
|
|
makeImportMatcher,
|
|
getImportableContents,
|
|
) where
|
|
|
|
import Annex.Common
|
|
import Types.Import
|
|
import qualified Types.Remote as Remote
|
|
import Git.Types
|
|
import Git.Tree
|
|
import Git.Sha
|
|
import Git.FilePath
|
|
import Git.History
|
|
import qualified Git.Ref
|
|
import qualified Git.Branch
|
|
import qualified Annex
|
|
import Annex.Link
|
|
import Annex.LockFile
|
|
import Annex.Content
|
|
import Annex.Export
|
|
import Annex.RemoteTrackingBranch
|
|
import Annex.HashObject
|
|
import Annex.Transfer
|
|
import Annex.CheckIgnore
|
|
import Annex.VectorClock
|
|
import Command
|
|
import Backend
|
|
import Types.Key
|
|
import Types.KeySource
|
|
import Messages.Progress
|
|
import Utility.DataUnits
|
|
import Utility.Metered
|
|
import Logs.Export
|
|
import Logs.Location
|
|
import Logs.PreferredContent
|
|
import Types.FileMatcher
|
|
import Annex.FileMatcher
|
|
import qualified Utility.Matcher
|
|
import qualified Database.Export as Export
|
|
import qualified Database.ContentIdentifier as CIDDb
|
|
import qualified Logs.ContentIdentifier as CIDLog
|
|
import Backend.Utilities
|
|
|
|
import Control.Concurrent.STM
|
|
import qualified Data.Map.Strict as M
|
|
import qualified Data.Set as S
|
|
import qualified System.FilePath.Posix.ByteString as Posix
|
|
import qualified System.FilePath.ByteString as P
|
|
|
|
{- Configures how to build an import tree. -}
|
|
data ImportTreeConfig
|
|
= ImportTree
|
|
-- ^ Import the tree as-is from the remote.
|
|
| ImportSubTree TopFilePath Sha
|
|
-- ^ Import a tree from the remote and graft it into a subdirectory
|
|
-- of the existing tree whose Sha is provided, replacing anything
|
|
-- that was there before.
|
|
deriving (Show)
|
|
|
|
{- Configures how to build an import commit. -}
|
|
data ImportCommitConfig = ImportCommitConfig
|
|
{ importCommitTracking :: Maybe Sha
|
|
-- ^ Current commit on the remote tracking branch.
|
|
, importCommitMode :: Git.Branch.CommitMode
|
|
, importCommitMessage :: String
|
|
}
|
|
|
|
{- Buils a commit for an import from a special remote.
|
|
-
|
|
- When there are no changes to make (importCommitTracking
|
|
- already matches what was imported), returns Nothing.
|
|
-
|
|
- After importing from a remote, exporting the same thing back to the
|
|
- remote should be a no-op. So, the export log and database are
|
|
- updated to reflect the imported tree.
|
|
-
|
|
- This does not download any content from a remote. But since it needs the
|
|
- Key of imported files to be known, its caller will have to first download
|
|
- new files in order to generate keys for them.
|
|
-}
|
|
buildImportCommit
|
|
:: Remote
|
|
-> ImportTreeConfig
|
|
-> ImportCommitConfig
|
|
-> ImportableContentsChunkable Annex (Either Sha Key)
|
|
-> Annex (Maybe Ref)
|
|
buildImportCommit remote importtreeconfig importcommitconfig importable =
|
|
case importCommitTracking importcommitconfig of
|
|
Nothing -> go Nothing
|
|
Just trackingcommit -> inRepo (Git.Ref.tree trackingcommit) >>= \case
|
|
Nothing -> go Nothing
|
|
Just _ -> go (Just trackingcommit)
|
|
where
|
|
go trackingcommit = do
|
|
(imported, updatestate) <- recordImportTree remote importtreeconfig importable
|
|
buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case
|
|
Just finalcommit -> do
|
|
updatestate
|
|
return (Just finalcommit)
|
|
Nothing -> return Nothing
|
|
|
|
{- Builds a tree for an import from a special remote.
|
|
-
|
|
- Also returns an action that can be used to update
|
|
- all the other state to record the import.
|
|
-}
|
|
recordImportTree
|
|
:: Remote
|
|
-> ImportTreeConfig
|
|
-> ImportableContentsChunkable Annex (Either Sha Key)
|
|
-> Annex (History Sha, Annex ())
|
|
recordImportTree remote importtreeconfig importable = do
|
|
imported@(History finaltree _) <- buildImportTrees basetree subdir importable
|
|
return (imported, updatestate finaltree)
|
|
where
|
|
basetree = case importtreeconfig of
|
|
ImportTree -> emptyTree
|
|
ImportSubTree _ sha -> sha
|
|
subdir = case importtreeconfig of
|
|
ImportTree -> Nothing
|
|
ImportSubTree dir _ -> Just dir
|
|
|
|
updatestate finaltree = do
|
|
importedtree <- case subdir of
|
|
Nothing -> pure finaltree
|
|
Just dir ->
|
|
let subtreeref = Ref $
|
|
fromRef' finaltree
|
|
<> ":"
|
|
<> getTopFilePath dir
|
|
in fromMaybe emptyTree
|
|
<$> inRepo (Git.Ref.tree subtreeref)
|
|
updateexportdb importedtree
|
|
oldexport <- updateexportlog importedtree
|
|
updatelocationlog oldexport importedtree
|
|
|
|
updateexportdb importedtree = do
|
|
db <- Export.openDb (Remote.uuid remote)
|
|
Export.writeLockDbWhile db $ do
|
|
prevtree <- liftIO $ fromMaybe emptyTree
|
|
<$> Export.getExportTreeCurrent db
|
|
when (importedtree /= prevtree) $ do
|
|
Export.updateExportDb db prevtree importedtree
|
|
liftIO $ Export.recordExportTreeCurrent db importedtree
|
|
Export.closeDb db
|
|
|
|
updateexportlog importedtree = do
|
|
oldexport <- getExport (Remote.uuid remote)
|
|
recordExport (Remote.uuid remote) importedtree $ ExportChange
|
|
{ oldTreeish = exportedTreeishes oldexport
|
|
, newTreeish = importedtree
|
|
}
|
|
return oldexport
|
|
|
|
-- downloadImport takes care of updating the location log
|
|
-- for the local repo when keys are downloaded, and also updates
|
|
-- the location log for the remote for keys that are present in it.
|
|
-- That leaves updating the location log for the remote for keys
|
|
-- that have had the last copy of their content removed from it.
|
|
--
|
|
-- This must run after the export database has been updated
|
|
-- and flushed to disk, so it can query it.
|
|
updatelocationlog oldexport finaltree = do
|
|
let stillpresent db k = liftIO $ not . null
|
|
<$> Export.getExportedLocation db k
|
|
let updater db moldkey _newkey _ = case moldkey of
|
|
Just oldkey | not (isGitShaKey oldkey) ->
|
|
unlessM (stillpresent db oldkey) $
|
|
logChange oldkey (Remote.uuid remote) InfoMissing
|
|
_ -> noop
|
|
-- When the remote is versioned, it still contains keys
|
|
-- that are not present in the new tree.
|
|
unless (Remote.versionedExport (Remote.exportActions remote)) $ do
|
|
db <- Export.openDb (Remote.uuid remote)
|
|
forM_ (exportedTreeishes oldexport) $ \oldtree ->
|
|
Export.runExportDiffUpdater updater db oldtree finaltree
|
|
Export.closeDb db
|
|
|
|
buildImportCommit' :: Remote -> ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha)
|
|
buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History ti _) =
|
|
case mtrackingcommit of
|
|
Nothing -> Just <$> mkcommitsunconnected imported
|
|
Just trackingcommit -> do
|
|
-- Get history of tracking branch to at most
|
|
-- one more level deep than what was imported,
|
|
-- so we'll have enough history to compare,
|
|
-- but not spend too much time getting it.
|
|
let maxdepth = succ importeddepth
|
|
inRepo (getHistoryToDepth maxdepth trackingcommit)
|
|
>>= go trackingcommit
|
|
where
|
|
go _ Nothing = Just <$> mkcommitsunconnected imported
|
|
go trackingcommit (Just h)
|
|
-- If the tracking branch head is a merge commit
|
|
-- and one side of the merge matches the history,
|
|
-- nothing new needs to be committed.
|
|
| t == ti && any sametodepth (S.toList s) = return Nothing
|
|
-- If the tracking branch matches the history,
|
|
-- nothing new needs to be committed.
|
|
-- (This is unlikely to happen.)
|
|
| sametodepth h' = return Nothing
|
|
| otherwise = do
|
|
importedcommit <- case getRemoteTrackingBranchImportHistory h of
|
|
Nothing -> mkcommitsunconnected imported
|
|
Just oldimported@(History oldhc _)
|
|
| importeddepth == 1 ->
|
|
mkcommitconnected imported oldimported
|
|
| otherwise -> do
|
|
let oldimportedtrees = mapHistory historyCommitTree oldimported
|
|
mknewcommits oldhc oldimportedtrees imported
|
|
ti' <- addBackExportExcluded remote ti
|
|
Just <$> makeRemoteTrackingBranchMergeCommit'
|
|
trackingcommit importedcommit ti'
|
|
where
|
|
h'@(History t s) = mapHistory historyCommitTree h
|
|
|
|
importeddepth = historyDepth imported
|
|
|
|
sametodepth b = imported == truncateHistoryToDepth importeddepth b
|
|
|
|
mkcommit parents tree = inRepo $ Git.Branch.commitTree
|
|
(importCommitMode importcommitconfig)
|
|
(importCommitMessage importcommitconfig)
|
|
parents
|
|
tree
|
|
|
|
-- Start a new history of import commits, not connected to any
|
|
-- prior import commits.
|
|
mkcommitsunconnected (History importedtree hs) = do
|
|
parents <- mapM mkcommitsunconnected (S.toList hs)
|
|
mkcommit parents importedtree
|
|
|
|
-- Commit the new history connected with the old history.
|
|
-- Used when the import is not versioned, so the history depth is 1.
|
|
mkcommitconnected (History importedtree _) (History oldhc _) = do
|
|
let parents = [historyCommit oldhc]
|
|
mkcommit parents importedtree
|
|
|
|
-- Reuse the commits from the old imported History when possible.
|
|
mknewcommits oldhc old new@(History importedtree hs)
|
|
| old == new = return $ historyCommit oldhc
|
|
| otherwise = do
|
|
parents <- mapM (mknewcommits oldhc old) (S.toList hs)
|
|
mkcommit parents importedtree
|
|
|
|
{- Builds a history of git trees reflecting the ImportableContents.
|
|
-
|
|
- When a subdir is provided, imported tree is grafted into the basetree at
|
|
- that location, replacing any object that was there.
|
|
-}
|
|
buildImportTrees
|
|
:: Ref
|
|
-> Maybe TopFilePath
|
|
-> ImportableContentsChunkable Annex (Either Sha Key)
|
|
-> Annex (History Sha)
|
|
buildImportTrees basetree msubdir (ImportableContentsComplete importable) = do
|
|
repo <- Annex.gitRepo
|
|
withMkTreeHandle repo $ buildImportTrees' basetree msubdir importable
|
|
buildImportTrees basetree msubdir importable@(ImportableContentsChunked {}) = do
|
|
repo <- Annex.gitRepo
|
|
withMkTreeHandle repo $ \hdl ->
|
|
History
|
|
<$> go hdl
|
|
<*> buildImportTreesHistory basetree msubdir
|
|
(importableHistoryComplete importable) hdl
|
|
where
|
|
go hdl = do
|
|
tree <- gochunks [] (importableContentsChunk importable) hdl
|
|
importtree <- liftIO $ recordTree' hdl tree
|
|
graftImportTree basetree msubdir importtree hdl
|
|
|
|
gochunks l c hdl = do
|
|
let subdir = importChunkSubDir $ importableContentsSubDir c
|
|
-- Full directory prefix where the sub tree is located.
|
|
let fullprefix = asTopFilePath $ case msubdir of
|
|
Nothing -> subdir
|
|
Just d -> getTopFilePath d Posix.</> subdir
|
|
Tree ts <- convertImportTree (Just fullprefix) $
|
|
map (\(p, i) -> (mkImportLocation p, i))
|
|
(importableContentsSubTree c)
|
|
-- Record this subtree before getting next chunk, this
|
|
-- avoids buffering all the chunks into memory.
|
|
tc <- liftIO $ recordSubTree hdl $
|
|
NewSubTree (asTopFilePath subdir) ts
|
|
importableContentsNextChunk c >>= \case
|
|
Nothing -> return (Tree (tc:l))
|
|
Just c' -> gochunks (tc:l) c' hdl
|
|
|
|
buildImportTrees'
|
|
:: Ref
|
|
-> Maybe TopFilePath
|
|
-> ImportableContents (Either Sha Key)
|
|
-> MkTreeHandle
|
|
-> Annex (History Sha)
|
|
buildImportTrees' basetree msubdir importable hdl = History
|
|
<$> buildImportTree basetree msubdir (importableContents importable) hdl
|
|
<*> buildImportTreesHistory basetree msubdir (importableHistory importable) hdl
|
|
|
|
buildImportTree
|
|
:: Ref
|
|
-> Maybe TopFilePath
|
|
-> [(ImportLocation, Either Sha Key)]
|
|
-> MkTreeHandle
|
|
-> Annex Sha
|
|
buildImportTree basetree msubdir ls hdl = do
|
|
importtree <- liftIO . recordTree' hdl =<< convertImportTree msubdir ls
|
|
graftImportTree basetree msubdir importtree hdl
|
|
|
|
graftImportTree
|
|
:: Ref
|
|
-> Maybe TopFilePath
|
|
-> Sha
|
|
-> MkTreeHandle
|
|
-> Annex Sha
|
|
graftImportTree basetree msubdir tree hdl = case msubdir of
|
|
Nothing -> return tree
|
|
Just subdir -> inRepo $ \repo ->
|
|
graftTree' tree subdir basetree repo hdl
|
|
|
|
convertImportTree :: Maybe TopFilePath -> [(ImportLocation, Either Sha Key)] -> Annex Tree
|
|
convertImportTree msubdir ls = treeItemsToTree <$> mapM mktreeitem ls
|
|
where
|
|
mktreeitem (loc, v) = case v of
|
|
Right k -> do
|
|
relf <- fromRepo $ fromTopFilePath topf
|
|
symlink <- calcRepo $ gitAnnexLink relf k
|
|
linksha <- hashSymlink symlink
|
|
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
|
|
Left sha ->
|
|
return $ TreeItem treepath (fromTreeItemType TreeFile) sha
|
|
where
|
|
lf = fromImportLocation loc
|
|
treepath = asTopFilePath lf
|
|
topf = asTopFilePath $
|
|
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
|
|
|
|
buildImportTreesHistory
|
|
:: Ref
|
|
-> Maybe TopFilePath
|
|
-> [ImportableContents (Either Sha Key)]
|
|
-> MkTreeHandle
|
|
-> Annex (S.Set (History Sha))
|
|
buildImportTreesHistory basetree msubdir history hdl = S.fromList
|
|
<$> mapM (\ic -> buildImportTrees' basetree msubdir ic hdl) history
|
|
|
|
canImportKeys :: Remote -> Bool -> Bool
|
|
canImportKeys remote importcontent =
|
|
importcontent || isJust (Remote.importKey ia)
|
|
where
|
|
ia = Remote.importActions remote
|
|
|
|
{- Downloads all new ContentIdentifiers, or when importcontent is False,
|
|
- generates Keys without downloading.
|
|
-
|
|
- Generates either a Key or a git Sha, depending on annex.largefiles.
|
|
- But when importcontent is False, it cannot match on annex.largefiles
|
|
- (or generate a git Sha), so always generates Keys.
|
|
-
|
|
- Supports concurrency when enabled.
|
|
-
|
|
- If it fails on any file, the whole thing fails with Nothing,
|
|
- but it will resume where it left off.
|
|
-
|
|
- Note that, when a ContentIdentifier has been imported before,
|
|
- generates the same thing that was imported before, so annex.largefiles
|
|
- is not reapplied.
|
|
-}
|
|
importKeys
|
|
:: Remote
|
|
-> ImportTreeConfig
|
|
-> Bool
|
|
-> Bool
|
|
-> ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)
|
|
-> Annex (Maybe (ImportableContentsChunkable Annex (Either Sha Key)))
|
|
importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents = do
|
|
unless (canImportKeys remote importcontent) $
|
|
giveup "This remote does not support importing without downloading content."
|
|
-- This map is used to remember content identifiers that
|
|
-- were just imported, before they have necessarily been
|
|
-- stored in the database. This way, if the same content
|
|
-- identifier appears multiple times in the
|
|
-- importablecontents (eg when it has a history),
|
|
-- they will only be imported once.
|
|
cidmap <- liftIO $ newTVarIO M.empty
|
|
-- When concurrency is enabled, this set is needed to
|
|
-- avoid two threads both importing the same content identifier.
|
|
importing <- liftIO $ newTVarIO S.empty
|
|
withciddb $ \db -> do
|
|
CIDDb.needsUpdateFromLog db
|
|
>>= maybe noop (CIDDb.updateFromLog db)
|
|
(prepclock (run cidmap importing db))
|
|
where
|
|
-- When not importing content, reuse the same vector
|
|
-- clock for all state that's recorded. This can save
|
|
-- a little bit of disk space. Individual file downloads
|
|
-- while downloading take too long for this optimisation
|
|
-- to be safe to do.
|
|
prepclock a
|
|
| importcontent = a
|
|
| otherwise = reuseVectorClockWhile a
|
|
|
|
withciddb a = do
|
|
cidlck <- calcRepo' gitAnnexContentIdentifierLock
|
|
withExclusiveLock cidlck $
|
|
bracket CIDDb.openDb CIDDb.closeDb a
|
|
|
|
run cidmap importing db = do
|
|
largematcher <- largeFilesMatcher
|
|
case importablecontents of
|
|
ImportableContentsComplete ic ->
|
|
go False largematcher cidmap importing db ic >>= return . \case
|
|
Nothing -> Nothing
|
|
Just v -> Just $ ImportableContentsComplete v
|
|
ImportableContentsChunked {} -> do
|
|
c <- gochunked db (importableContentsChunk importablecontents)
|
|
gohistory largematcher cidmap importing db (importableHistoryComplete importablecontents) >>= return . \case
|
|
Nothing -> Nothing
|
|
Just h -> Just $ ImportableContentsChunked
|
|
{ importableContentsChunk = c
|
|
, importableHistoryComplete = h
|
|
}
|
|
|
|
go oldversion largematcher cidmap importing db (ImportableContents l h) = do
|
|
jobs <- forM l $ \i ->
|
|
if thirdpartypopulated
|
|
then Left <$> thirdpartypopulatedimport db i
|
|
else startimport cidmap importing db i oldversion largematcher
|
|
l' <- liftIO $ forM jobs $
|
|
either pure (atomically . takeTMVar)
|
|
if any isNothing l'
|
|
then return Nothing
|
|
else gohistory largematcher cidmap importing db h >>= return . \case
|
|
Nothing -> Nothing
|
|
Just h' -> Just $ ImportableContents (catMaybes l') h'
|
|
|
|
gohistory largematcher cidmap importing db h = do
|
|
h' <- mapM (go True largematcher cidmap importing db) h
|
|
if any isNothing h'
|
|
then return Nothing
|
|
else return $ Just $ catMaybes h'
|
|
|
|
gochunked db c
|
|
-- Downloading cannot be done when chunked, since only
|
|
-- the first chunk is processed before returning.
|
|
| importcontent = error "importKeys does not support downloading chunked import"
|
|
-- Chunked import is currently only used by thirdpartypopulated
|
|
-- remotes.
|
|
| not thirdpartypopulated = error "importKeys does not support chunked import when not thirdpartypopulated"
|
|
| otherwise = do
|
|
l <- forM (importableContentsSubTree c) $ \(loc, i) -> do
|
|
let loc' = importableContentsChunkFullLocation (importableContentsSubDir c) loc
|
|
thirdpartypopulatedimport db (loc', i) >>= return . \case
|
|
Just (_loc, k) -> Just (loc, k)
|
|
Nothing -> Nothing
|
|
return $ ImportableContentsChunk
|
|
{ importableContentsSubDir = importableContentsSubDir c
|
|
, importableContentsSubTree = catMaybes l
|
|
, importableContentsNextChunk =
|
|
importableContentsNextChunk c >>= \case
|
|
Nothing -> return Nothing
|
|
Just c' -> withciddb $ \db' ->
|
|
prepclock $
|
|
Just <$> gochunked db' c'
|
|
}
|
|
|
|
waitstart importing cid = liftIO $ atomically $ do
|
|
s <- readTVar importing
|
|
if S.member cid s
|
|
then retry
|
|
else writeTVar importing $ S.insert cid s
|
|
|
|
signaldone importing cid = liftIO $ atomically $ do
|
|
s <- readTVar importing
|
|
writeTVar importing $ S.delete cid s
|
|
|
|
startimport cidmap importing db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
|
|
(k:ks) ->
|
|
-- If the same content was imported before
|
|
-- yielding multiple different keys, it's not clear
|
|
-- which is best to use this time, so pick the
|
|
-- first in the list. But, if any of them is a
|
|
-- git sha, use it, because the content must
|
|
-- be included in the git repo then.
|
|
let v = case mapMaybe keyGitSha (k:ks) of
|
|
(sha:_) -> Left sha
|
|
[] -> Right k
|
|
in return $ Left $ Just (loc, v)
|
|
[] -> do
|
|
job <- liftIO $ newEmptyTMVarIO
|
|
let ai = ActionItemOther (Just (fromRawFilePath (fromImportLocation loc)))
|
|
let si = SeekInput []
|
|
let importaction = starting ("import " ++ Remote.name remote) ai si $ do
|
|
when oldversion $
|
|
showNote "old version"
|
|
tryNonAsync (importordownload cidmap db i largematcher) >>= \case
|
|
Left e -> next $ do
|
|
warning (show e)
|
|
liftIO $ atomically $
|
|
putTMVar job Nothing
|
|
return False
|
|
Right r -> next $ do
|
|
liftIO $ atomically $
|
|
putTMVar job r
|
|
return True
|
|
commandAction $ bracket_
|
|
(waitstart importing cid)
|
|
(signaldone importing cid)
|
|
importaction
|
|
return (Right job)
|
|
|
|
thirdpartypopulatedimport db (loc, (cid, sz)) =
|
|
case Remote.importKey ia of
|
|
Nothing -> return Nothing
|
|
Just importkey ->
|
|
tryNonAsync (importkey loc cid sz nullMeterUpdate) >>= \case
|
|
Right (Just k) -> do
|
|
recordcidkey' db cid k
|
|
logChange k (Remote.uuid remote) InfoPresent
|
|
return $ Just (loc, Right k)
|
|
Right Nothing -> return Nothing
|
|
Left e -> do
|
|
warning (show e)
|
|
return Nothing
|
|
|
|
importordownload cidmap db (loc, (cid, sz)) largematcher= do
|
|
f <- locworktreefile loc
|
|
matcher <- largematcher f
|
|
-- When importing a key is supported, always use it rather
|
|
-- than downloading and retrieving a key, to avoid
|
|
-- generating trees with different keys for the same content.
|
|
let act = if importcontent
|
|
then case Remote.importKey ia of
|
|
Nothing -> dodownload
|
|
Just _ -> if Utility.Matcher.introspect matchNeedsFileContent matcher
|
|
then dodownload
|
|
else doimport
|
|
else doimport
|
|
act cidmap db (loc, (cid, sz)) f matcher
|
|
|
|
doimport cidmap db (loc, (cid, sz)) f matcher =
|
|
case Remote.importKey ia of
|
|
Nothing -> error "internal" -- checked earlier
|
|
Just importkey -> do
|
|
when (Utility.Matcher.introspect matchNeedsFileContent matcher) $
|
|
giveup "annex.largefiles configuration examines file contents, so cannot import without content."
|
|
let mi = MatchingInfo ProvidedInfo
|
|
{ providedFilePath = Just f
|
|
, providedKey = Nothing
|
|
, providedFileSize = Just sz
|
|
, providedMimeType = Nothing
|
|
, providedMimeEncoding = Nothing
|
|
, providedLinkType = Nothing
|
|
}
|
|
let bwlimit = remoteAnnexBwLimit (Remote.gitconfig remote)
|
|
islargefile <- checkMatcher' matcher mi mempty
|
|
metered Nothing sz bwlimit $ const $ if islargefile
|
|
then doimportlarge importkey cidmap db loc cid sz f
|
|
else doimportsmall cidmap db loc cid sz
|
|
|
|
doimportlarge importkey cidmap db loc cid sz f p =
|
|
tryNonAsync importer >>= \case
|
|
Right (Just (k, True)) -> return $ Just (loc, Right k)
|
|
Right _ -> return Nothing
|
|
Left e -> do
|
|
warning (show e)
|
|
return Nothing
|
|
where
|
|
importer = do
|
|
-- Don't display progress when generating
|
|
-- key, if the content will later be
|
|
-- downloaded, which is a more expensive
|
|
-- operation generally.
|
|
let p' = if importcontent then nullMeterUpdate else p
|
|
importkey loc cid sz p' >>= \case
|
|
Nothing -> return Nothing
|
|
Just k -> checkSecureHashes k >>= \case
|
|
Nothing -> do
|
|
recordcidkey cidmap db cid k
|
|
logChange k (Remote.uuid remote) InfoPresent
|
|
if importcontent
|
|
then getcontent k
|
|
else return (Just (k, True))
|
|
Just msg -> giveup (msg ++ " to import")
|
|
|
|
getcontent :: Key -> Annex (Maybe (Key, Bool))
|
|
getcontent k = do
|
|
let af = AssociatedFile (Just f)
|
|
let downloader p' tmpfile = do
|
|
_ <- Remote.retrieveExportWithContentIdentifier
|
|
ia loc [cid] (fromRawFilePath tmpfile)
|
|
(Left k)
|
|
(combineMeterUpdate p' p)
|
|
ok <- moveAnnex k af tmpfile
|
|
when ok $
|
|
logStatus k InfoPresent
|
|
return (Just (k, ok))
|
|
checkDiskSpaceToGet k Nothing $
|
|
notifyTransfer Download af $
|
|
download' (Remote.uuid remote) k af Nothing stdRetry $ \p' ->
|
|
withTmp k $ downloader p'
|
|
|
|
-- The file is small, so is added to git, so while importing
|
|
-- without content does not retrieve annexed files, it does
|
|
-- need to retrieve this file.
|
|
doimportsmall cidmap db loc cid sz p = do
|
|
let downloader tmpfile = do
|
|
(k, _) <- Remote.retrieveExportWithContentIdentifier
|
|
ia loc [cid] (fromRawFilePath tmpfile)
|
|
(Right (mkkey tmpfile))
|
|
p
|
|
case keyGitSha k of
|
|
Just sha -> do
|
|
recordcidkey cidmap db cid k
|
|
return sha
|
|
Nothing -> error "internal"
|
|
checkDiskSpaceToGet tmpkey Nothing $
|
|
withTmp tmpkey $ \tmpfile ->
|
|
tryNonAsync (downloader tmpfile) >>= \case
|
|
Right sha -> return $ Just (loc, Left sha)
|
|
Left e -> do
|
|
warning (show e)
|
|
return Nothing
|
|
where
|
|
tmpkey = importKey cid sz
|
|
mkkey tmpfile = gitShaKey <$> hashFile tmpfile
|
|
|
|
dodownload cidmap db (loc, (cid, sz)) f matcher = do
|
|
let af = AssociatedFile (Just f)
|
|
let downloader tmpfile p = do
|
|
(k, _) <- Remote.retrieveExportWithContentIdentifier
|
|
ia loc [cid] (fromRawFilePath tmpfile)
|
|
(Right (mkkey tmpfile))
|
|
p
|
|
case keyGitSha k of
|
|
Nothing -> do
|
|
ok <- moveAnnex k af tmpfile
|
|
when ok $ do
|
|
recordcidkey cidmap db cid k
|
|
logStatus k InfoPresent
|
|
logChange k (Remote.uuid remote) InfoPresent
|
|
return (Right k, ok)
|
|
Just sha -> do
|
|
recordcidkey cidmap db cid k
|
|
return (Left sha, True)
|
|
let rundownload tmpfile p = tryNonAsync (downloader tmpfile p) >>= \case
|
|
Right (v, True) -> return $ Just (loc, v)
|
|
Right (_, False) -> return Nothing
|
|
Left e -> do
|
|
warning (show e)
|
|
return Nothing
|
|
let bwlimit = remoteAnnexBwLimit (Remote.gitconfig remote)
|
|
checkDiskSpaceToGet tmpkey Nothing $
|
|
notifyTransfer Download af $
|
|
download' (Remote.uuid remote) tmpkey af Nothing stdRetry $ \p ->
|
|
withTmp tmpkey $ \tmpfile ->
|
|
metered (Just p) tmpkey bwlimit $
|
|
const (rundownload tmpfile)
|
|
where
|
|
tmpkey = importKey cid sz
|
|
|
|
mkkey tmpfile = do
|
|
let mi = MatchingFile FileInfo
|
|
{ matchFile = f
|
|
, contentFile = tmpfile
|
|
, matchKey = Nothing
|
|
}
|
|
islargefile <- checkMatcher' matcher mi mempty
|
|
if islargefile
|
|
then do
|
|
backend <- chooseBackend f
|
|
let ks = KeySource
|
|
{ keyFilename = f
|
|
, contentLocation = tmpfile
|
|
, inodeCache = Nothing
|
|
}
|
|
fst <$> genKey ks nullMeterUpdate backend
|
|
else gitShaKey <$> hashFile tmpfile
|
|
|
|
ia = Remote.importActions remote
|
|
|
|
locworktreefile loc = fromRepo $ fromTopFilePath $ asTopFilePath $
|
|
case importtreeconfig of
|
|
ImportTree -> fromImportLocation loc
|
|
ImportSubTree subdir _ ->
|
|
getTopFilePath subdir P.</> fromImportLocation loc
|
|
|
|
getcidkey cidmap db cid = liftIO $
|
|
CIDDb.getContentIdentifierKeys db rs cid >>= \case
|
|
[] -> atomically $
|
|
maybeToList . M.lookup cid <$> readTVar cidmap
|
|
l -> return l
|
|
|
|
recordcidkey cidmap db cid k = do
|
|
liftIO $ atomically $ modifyTVar' cidmap $
|
|
M.insert cid k
|
|
recordcidkey' db cid k
|
|
recordcidkey' db cid k = do
|
|
liftIO $ CIDDb.recordContentIdentifier db rs cid k
|
|
CIDLog.recordContentIdentifier rs cid k
|
|
|
|
rs = Remote.remoteStateHandle remote
|
|
|
|
{- Temporary key used for import of a ContentIdentifier while downloading
|
|
- content, before generating its real key. -}
|
|
importKey :: ContentIdentifier -> Integer -> Key
|
|
importKey (ContentIdentifier cid) size = mkKey $ \k -> k
|
|
{ keyName = genKeyName (decodeBS cid)
|
|
, keyVariety = OtherKey "CID"
|
|
, keySize = Just size
|
|
}
|
|
|
|
{-- Export omits non-preferred content from the tree stored on the
|
|
-- remote. So the import will normally have that content
|
|
-- omitted (unless something else added files with the same names to the
|
|
-- special remote).
|
|
--
|
|
-- That presents a problem: Merging the imported tree would result
|
|
-- in deletion of the files that were excluded from export.
|
|
-- To avoid that happening, this adds them back to the imported tree.
|
|
--}
|
|
addBackExportExcluded :: Remote -> Sha -> Annex Sha
|
|
addBackExportExcluded remote importtree =
|
|
getExportExcluded (Remote.uuid remote) >>= \case
|
|
[] -> return importtree
|
|
excludedlist -> inRepo $
|
|
adjustTree
|
|
-- don't remove any
|
|
(pure . Just)
|
|
excludedlist
|
|
-- if something was imported with the same
|
|
-- name as a file that was previously
|
|
-- excluded from import, use what was imported
|
|
(\imported _excluded -> imported)
|
|
[]
|
|
importtree
|
|
|
|
{- Match the preferred content of the remote at import time.
|
|
-
|
|
- Only keyless tokens are supported, because the keys are not known
|
|
- until an imported file is downloaded, which is too late to bother
|
|
- excluding it from an import.
|
|
-}
|
|
makeImportMatcher :: Remote -> Annex (Either String (FileMatcher Annex))
|
|
makeImportMatcher r = load preferredContentKeylessTokens >>= \case
|
|
Nothing -> return $ Right matchAll
|
|
Just (Right v) -> return $ Right v
|
|
Just (Left err) -> load preferredContentTokens >>= \case
|
|
Just (Left err') -> return $ Left err'
|
|
_ -> return $ Left $
|
|
"The preferred content expression contains terms that cannot be checked when importing: " ++ err
|
|
where
|
|
load t = M.lookup (Remote.uuid r) . fst <$> preferredRequiredMapsLoad' t
|
|
|
|
{- Gets the ImportableContents from the remote.
|
|
-
|
|
- Filters out any paths that include a ".git" component, because git does
|
|
- not allow storing ".git" in a git repository. While it is possible to
|
|
- write a git tree that contains that, git will complain and refuse to
|
|
- check it out.
|
|
-
|
|
- Filters out new things not matching the FileMatcher or that are
|
|
- gitignored. However, files that are already in git get imported
|
|
- regardless. (Similar to how git add behaves on gitignored files.)
|
|
- This avoids creating a remote tracking branch that, when merged,
|
|
- would delete the files.
|
|
-
|
|
- Throws exception if unable to contact the remote.
|
|
- Returns Nothing when there is no change since last time.
|
|
-}
|
|
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
|
getImportableContents r importtreeconfig ci matcher = do
|
|
Remote.listImportableContents (Remote.importActions r) >>= \case
|
|
Just (ImportableContentsComplete ic) -> do
|
|
dbhandle <- opendbhandle
|
|
Just . ImportableContentsComplete
|
|
<$> filterunwanted dbhandle ic
|
|
Just (c@(ImportableContentsChunked {})) -> do
|
|
dbhandle <- opendbhandle
|
|
Just <$> filterunwantedchunked dbhandle c
|
|
Nothing -> return Nothing
|
|
where
|
|
filterunwanted dbhandle ic = ImportableContents
|
|
<$> filterM (wanted dbhandle) (importableContents ic)
|
|
<*> mapM (filterunwanted dbhandle) (importableHistory ic)
|
|
|
|
filterunwantedchunked dbhandle c = ImportableContentsChunked
|
|
<$> filterunwantedchunk dbhandle (importableContentsChunk c)
|
|
<*> mapM (filterunwanted dbhandle) (importableHistoryComplete c)
|
|
|
|
filterunwantedchunk dbhandle c = ImportableContentsChunk
|
|
<$> pure (importableContentsSubDir c)
|
|
<*> filterM (wantedunder dbhandle (importableContentsSubDir c))
|
|
(importableContentsSubTree c)
|
|
<*> pure (
|
|
importableContentsNextChunk c >>= \case
|
|
Nothing -> return Nothing
|
|
Just c' -> Just <$> filterunwantedchunk dbhandle c'
|
|
)
|
|
|
|
opendbhandle = Export.openDb (Remote.uuid r)
|
|
|
|
wanted dbhandle (loc, (_cid, sz))
|
|
| ingitdir = pure False
|
|
| otherwise =
|
|
isknown <||> (matches <&&> notignored)
|
|
where
|
|
-- Checks, from least to most expensive.
|
|
ingitdir = ".git" `elem` Posix.splitDirectories (fromImportLocation loc)
|
|
matches = matchesImportLocation matcher loc sz
|
|
isknown = isKnownImportLocation dbhandle loc
|
|
notignored = notIgnoredImportLocation importtreeconfig ci loc
|
|
|
|
wantedunder dbhandle root (loc, v) =
|
|
wanted dbhandle (importableContentsChunkFullLocation root loc, v)
|
|
|
|
isKnownImportLocation :: Export.ExportHandle -> ImportLocation -> Annex Bool
|
|
isKnownImportLocation dbhandle loc = liftIO $
|
|
not . null <$> Export.getExportTreeKey dbhandle loc
|
|
|
|
matchesImportLocation :: FileMatcher Annex -> ImportLocation -> Integer -> Annex Bool
|
|
matchesImportLocation matcher loc sz = checkMatcher' matcher mi mempty
|
|
where
|
|
mi = MatchingInfo $ ProvidedInfo
|
|
{ providedFilePath = Just (fromImportLocation loc)
|
|
, providedKey = Nothing
|
|
, providedFileSize = Just sz
|
|
, providedMimeType = Nothing
|
|
, providedMimeEncoding = Nothing
|
|
, providedLinkType = Nothing
|
|
}
|
|
|
|
notIgnoredImportLocation :: ImportTreeConfig -> CheckGitIgnore -> ImportLocation -> Annex Bool
|
|
notIgnoredImportLocation importtreeconfig ci loc = not <$> checkIgnored ci f
|
|
where
|
|
f = case importtreeconfig of
|
|
ImportSubTree dir _ ->
|
|
getTopFilePath dir P.</> fromImportLocation loc
|
|
ImportTree ->
|
|
fromImportLocation loc
|