2019-02-21 21:32:59 +00:00
|
|
|
{- git-annex import from remotes
|
|
|
|
-
|
|
|
|
- Copyright 2019 Joey Hess <id@joeyh.name>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
2019-02-27 17:15:02 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2019-02-26 19:25:28 +00:00
|
|
|
|
2019-02-23 19:47:55 +00:00
|
|
|
module Annex.Import (
|
2019-03-04 20:02:56 +00:00
|
|
|
importTree,
|
2019-02-23 19:47:55 +00:00
|
|
|
ImportTreeConfig(..),
|
|
|
|
ImportCommitConfig(..),
|
|
|
|
buildImportCommit,
|
2019-02-26 19:25:28 +00:00
|
|
|
buildImportTrees,
|
|
|
|
downloadImport
|
2019-02-23 19:47:55 +00:00
|
|
|
) where
|
2019-02-21 21:32:59 +00:00
|
|
|
|
|
|
|
import Annex.Common
|
|
|
|
import Types.Import
|
2019-02-26 17:11:25 +00:00
|
|
|
import qualified Types.Remote as Remote
|
2019-02-21 21:32:59 +00:00
|
|
|
import Git.Types
|
|
|
|
import Git.Tree
|
2019-02-22 16:41:17 +00:00
|
|
|
import Git.Sha
|
2019-02-21 21:32:59 +00:00
|
|
|
import Git.FilePath
|
2019-04-24 19:13:07 +00:00
|
|
|
import Git.History
|
2019-02-22 16:41:17 +00:00
|
|
|
import qualified Git.Ref
|
|
|
|
import qualified Git.Branch
|
|
|
|
import qualified Annex
|
2019-02-21 21:32:59 +00:00
|
|
|
import Annex.Link
|
2019-02-22 16:41:17 +00:00
|
|
|
import Annex.LockFile
|
2019-02-27 17:15:02 +00:00
|
|
|
import Annex.Content
|
2019-03-01 17:26:15 +00:00
|
|
|
import Annex.Export
|
2019-05-01 17:13:00 +00:00
|
|
|
import Annex.RemoteTrackingBranch
|
2019-03-08 16:33:44 +00:00
|
|
|
import Command
|
2019-02-27 17:15:02 +00:00
|
|
|
import Backend
|
2019-03-04 20:02:56 +00:00
|
|
|
import Config
|
2019-02-27 17:15:02 +00:00
|
|
|
import Types.Key
|
|
|
|
import Types.KeySource
|
2019-03-08 16:43:03 +00:00
|
|
|
import Messages.Progress
|
2019-02-27 17:15:02 +00:00
|
|
|
import Utility.DataUnits
|
2019-02-22 16:41:17 +00:00
|
|
|
import Logs.Export
|
2019-02-27 17:58:03 +00:00
|
|
|
import Logs.Location
|
2019-02-26 19:25:28 +00:00
|
|
|
import qualified Database.Export as Export
|
2019-03-06 22:04:30 +00:00
|
|
|
import qualified Database.ContentIdentifier as CIDDb
|
|
|
|
import qualified Logs.ContentIdentifier as CIDLog
|
2019-02-26 19:25:28 +00:00
|
|
|
|
|
|
|
import Control.Concurrent.STM
|
|
|
|
import qualified Data.Map.Strict as M
|
2019-03-08 16:33:44 +00:00
|
|
|
import qualified Data.Set as S
|
2019-03-04 20:02:56 +00:00
|
|
|
|
2019-02-23 19:47:55 +00:00
|
|
|
{- Configures how to build an import tree. -}
|
|
|
|
data ImportTreeConfig
|
|
|
|
= ImportTree
|
|
|
|
-- ^ Import the tree as-is from the remote.
|
|
|
|
| ImportSubTree TopFilePath Sha
|
|
|
|
-- ^ Import a tree from the remote and graft it into a subdirectory
|
|
|
|
-- of the existing tree whose Sha is provided, replacing anything
|
|
|
|
-- that was there before.
|
|
|
|
deriving (Show)
|
|
|
|
|
|
|
|
{- Configures how to build an import commit. -}
|
|
|
|
data ImportCommitConfig = ImportCommitConfig
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
{ importCommitTracking :: Maybe Sha
|
|
|
|
-- ^ Current commit on the remote tracking branch.
|
2019-02-23 19:47:55 +00:00
|
|
|
, importCommitMode :: Git.Branch.CommitMode
|
|
|
|
, importCommitMessage :: String
|
|
|
|
}
|
|
|
|
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
{- Buils a commit for an import from a special remote.
|
2019-02-21 21:32:59 +00:00
|
|
|
-
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
- When there are no changes to make (importCommitTracking
|
|
|
|
- already matches what was imported), returns Nothing.
|
2019-02-23 19:47:55 +00:00
|
|
|
-
|
2019-02-21 21:32:59 +00:00
|
|
|
- After importing from a remote, exporting the same thing back to the
|
2019-02-22 16:41:17 +00:00
|
|
|
- remote should be a no-op. So, the export log and database are
|
|
|
|
- updated to reflect the imported tree.
|
2019-02-21 21:32:59 +00:00
|
|
|
-
|
2019-02-23 19:47:55 +00:00
|
|
|
- This does not download any content from a remote. But since it needs the
|
2019-02-22 16:41:17 +00:00
|
|
|
- Key of imported files to be known, its caller will have to first download
|
2019-02-21 21:32:59 +00:00
|
|
|
- new files in order to generate keys for them.
|
|
|
|
-}
|
|
|
|
buildImportCommit
|
2019-02-22 16:41:17 +00:00
|
|
|
:: Remote
|
2019-02-23 19:47:55 +00:00
|
|
|
-> ImportTreeConfig
|
|
|
|
-> ImportCommitConfig
|
2019-02-21 21:32:59 +00:00
|
|
|
-> ImportableContents Key
|
2019-02-26 17:11:25 +00:00
|
|
|
-> Annex (Maybe Ref)
|
2019-02-23 19:47:55 +00:00
|
|
|
buildImportCommit remote importtreeconfig importcommitconfig importable =
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
case importCommitTracking importcommitconfig of
|
2019-04-24 19:13:07 +00:00
|
|
|
Nothing -> go Nothing
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
Just trackingcommit -> inRepo (Git.Ref.tree trackingcommit) >>= \case
|
2019-04-24 19:13:07 +00:00
|
|
|
Nothing -> go Nothing
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
Just _ -> go (Just trackingcommit)
|
2019-02-21 21:32:59 +00:00
|
|
|
where
|
2019-02-23 19:47:55 +00:00
|
|
|
basetree = case importtreeconfig of
|
|
|
|
ImportTree -> emptyTree
|
|
|
|
ImportSubTree _ sha -> sha
|
|
|
|
subdir = case importtreeconfig of
|
|
|
|
ImportTree -> Nothing
|
|
|
|
ImportSubTree dir _ -> Just dir
|
|
|
|
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
go trackingcommit = do
|
2019-02-23 19:47:55 +00:00
|
|
|
imported@(History finaltree _) <-
|
|
|
|
buildImportTrees basetree subdir importable
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
buildImportCommit' importcommitconfig trackingcommit imported >>= \case
|
|
|
|
Just finalcommit -> do
|
2019-03-11 17:44:23 +00:00
|
|
|
updatestate finaltree
|
2019-02-26 17:11:25 +00:00
|
|
|
return (Just finalcommit)
|
2019-04-24 19:13:07 +00:00
|
|
|
Nothing -> return Nothing
|
2019-03-11 17:44:23 +00:00
|
|
|
|
|
|
|
updatestate committedtree = do
|
|
|
|
importedtree <- case subdir of
|
|
|
|
Nothing -> pure committedtree
|
|
|
|
Just dir ->
|
|
|
|
let subtreeref = Ref $
|
|
|
|
fromRef committedtree ++ ":" ++ getTopFilePath dir
|
|
|
|
in fromMaybe emptyTree
|
|
|
|
<$> inRepo (Git.Ref.tree subtreeref)
|
|
|
|
updateexportdb importedtree
|
|
|
|
oldexport <- updateexportlog importedtree
|
|
|
|
updatelocationlog oldexport importedtree
|
2019-04-24 19:13:07 +00:00
|
|
|
|
2019-03-07 19:59:44 +00:00
|
|
|
updateexportdb importedtree = do
|
|
|
|
db <- Export.openDb (Remote.uuid remote)
|
|
|
|
Export.writeLockDbWhile db $ do
|
2019-02-22 16:41:17 +00:00
|
|
|
prevtree <- liftIO $ fromMaybe emptyTree
|
2019-02-26 19:25:28 +00:00
|
|
|
<$> Export.getExportTreeCurrent db
|
2019-02-22 16:41:17 +00:00
|
|
|
when (importedtree /= prevtree) $ do
|
2019-02-27 19:29:41 +00:00
|
|
|
Export.updateExportDb db prevtree importedtree
|
2019-02-26 19:25:28 +00:00
|
|
|
liftIO $ Export.recordExportTreeCurrent db importedtree
|
2019-03-07 19:59:44 +00:00
|
|
|
Export.closeDb db
|
2019-02-23 19:47:55 +00:00
|
|
|
|
2019-02-22 16:41:17 +00:00
|
|
|
updateexportlog importedtree = do
|
2019-03-01 17:26:15 +00:00
|
|
|
oldexport <- getExport (Remote.uuid remote)
|
2019-02-26 17:11:25 +00:00
|
|
|
recordExport (Remote.uuid remote) $ ExportChange
|
2019-03-01 17:26:15 +00:00
|
|
|
{ oldTreeish = exportedTreeishes oldexport
|
2019-02-22 16:41:17 +00:00
|
|
|
, newTreeish = importedtree
|
|
|
|
}
|
2019-03-01 17:26:15 +00:00
|
|
|
return oldexport
|
|
|
|
|
|
|
|
-- downloadImport takes care of updating the location log
|
|
|
|
-- for the local repo when keys are downloaded, and also updates
|
|
|
|
-- the location log for the remote for keys that are present in it.
|
|
|
|
-- That leaves updating the location log for the remote for keys
|
|
|
|
-- that have had the last copy of their content removed from it.
|
|
|
|
--
|
|
|
|
-- This must run after the export database has been updated
|
|
|
|
-- and flushed to disk, so it can query it.
|
|
|
|
updatelocationlog oldexport finaltree = do
|
|
|
|
let stillpresent db k = liftIO $ not . null
|
|
|
|
<$> Export.getExportedLocation db k
|
|
|
|
let updater db oldkey _newkey _ = case oldkey of
|
|
|
|
Just (AnnexKey k) -> unlessM (stillpresent db k) $
|
|
|
|
logChange k (Remote.uuid remote) InfoMissing
|
|
|
|
Just (GitKey _) -> noop
|
|
|
|
Nothing -> noop
|
|
|
|
db <- Export.openDb (Remote.uuid remote)
|
|
|
|
forM_ (exportedTreeishes oldexport) $ \oldtree ->
|
|
|
|
Export.runExportDiffUpdater updater db oldtree finaltree
|
|
|
|
Export.closeDb db
|
2019-02-21 21:32:59 +00:00
|
|
|
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
buildImportCommit' :: ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha)
|
|
|
|
buildImportCommit' importcommitconfig mtrackingcommit imported@(History ti _) =
|
|
|
|
case mtrackingcommit of
|
|
|
|
Nothing -> Just <$> mkcommits imported
|
|
|
|
Just trackingcommit -> do
|
|
|
|
-- Get history of tracking branch to at most
|
|
|
|
-- one more level deep, so sametodepth will
|
|
|
|
-- always have enough history to compare,
|
|
|
|
-- but unncessary history won't be loaded.
|
|
|
|
let maxdepth = succ (historyDepth imported)
|
|
|
|
inRepo (getHistoryToDepth maxdepth trackingcommit)
|
|
|
|
>>= go trackingcommit
|
2019-04-26 14:17:02 +00:00
|
|
|
where
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
go _ Nothing = Just <$> mkcommits imported
|
|
|
|
go trackingcommit (Just h)
|
2019-05-01 16:37:54 +00:00
|
|
|
-- If the tracking branch matches the history,
|
|
|
|
-- nothing new needs to be committed.
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
| sametodepth imported h' = return Nothing
|
2019-05-01 16:37:54 +00:00
|
|
|
-- If the tracking branch head is a merge commit
|
|
|
|
-- with a tree that matches the head of the history,
|
|
|
|
-- and one side of the merge matches the history,
|
|
|
|
-- nothing new needs to be committed.
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
| t == ti && any (sametodepth imported) (S.toList s) = return Nothing
|
2019-05-01 16:37:54 +00:00
|
|
|
-- Make a merge commit, with one side being the import, and
|
|
|
|
-- the other being the trackingcommit. This way the history
|
|
|
|
-- as imported is preserved, even when it differs from the
|
|
|
|
-- history as exported, and git merge will understand that
|
|
|
|
-- the history is connected.
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
| otherwise = do
|
2019-05-01 17:13:00 +00:00
|
|
|
importedcommit <- mkcommits imported
|
|
|
|
Just <$> makeRemoteTrackingBranchMergeCommit'
|
|
|
|
trackingcommit importedcommit ti
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
where
|
|
|
|
h'@(History t s) = mapHistory historyCommitTree h
|
2019-04-23 20:34:19 +00:00
|
|
|
|
|
|
|
sametodepth a b = a == truncateHistoryToDepth (historyDepth a) b
|
|
|
|
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
mkcommits (History importedtree hs) = do
|
|
|
|
parents <- mapM mkcommits (S.toList hs)
|
|
|
|
mkcommit parents importedtree
|
|
|
|
mkcommit parents tree = inRepo $ Git.Branch.commitTree
|
|
|
|
(importCommitMode importcommitconfig)
|
|
|
|
(importCommitMessage importcommitconfig)
|
|
|
|
parents
|
|
|
|
tree
|
|
|
|
|
2019-02-22 16:41:17 +00:00
|
|
|
{- Builds a history of git trees reflecting the ImportableContents.
|
|
|
|
-
|
|
|
|
- When a subdir is provided, imported tree is grafted into the basetree at
|
|
|
|
- that location, replacing any object that was there.
|
|
|
|
-}
|
2019-02-21 21:32:59 +00:00
|
|
|
buildImportTrees
|
2019-02-22 16:41:17 +00:00
|
|
|
:: Ref
|
|
|
|
-> Maybe TopFilePath
|
2019-02-21 21:32:59 +00:00
|
|
|
-> ImportableContents Key
|
|
|
|
-> Annex (History Sha)
|
2019-02-22 16:41:17 +00:00
|
|
|
buildImportTrees basetree msubdir importable = History
|
2019-04-23 19:08:37 +00:00
|
|
|
<$> (buildtree (importableContents importable) =<< Annex.gitRepo)
|
|
|
|
<*> buildhistory
|
2019-02-21 21:32:59 +00:00
|
|
|
where
|
2019-04-23 19:08:37 +00:00
|
|
|
buildhistory = S.fromList
|
|
|
|
<$> mapM (buildImportTrees basetree msubdir)
|
|
|
|
(importableHistory importable)
|
|
|
|
|
|
|
|
buildtree ls repo = withMkTreeHandle repo $ \hdl -> do
|
2019-02-22 16:41:17 +00:00
|
|
|
importtree <- liftIO . recordTree' hdl
|
|
|
|
. treeItemsToTree
|
|
|
|
=<< mapM mktreeitem ls
|
|
|
|
case msubdir of
|
|
|
|
Nothing -> return importtree
|
|
|
|
Just subdir -> liftIO $
|
|
|
|
graftTree' importtree subdir basetree repo hdl
|
2019-04-23 19:08:37 +00:00
|
|
|
|
2019-02-21 21:32:59 +00:00
|
|
|
mktreeitem (loc, k) = do
|
|
|
|
let lf = fromImportLocation loc
|
2019-02-22 16:41:17 +00:00
|
|
|
let treepath = asTopFilePath lf
|
|
|
|
let topf = asTopFilePath $
|
|
|
|
maybe lf (\sd -> getTopFilePath sd </> lf) msubdir
|
2019-02-21 21:32:59 +00:00
|
|
|
relf <- fromRepo $ fromTopFilePath topf
|
|
|
|
symlink <- calcRepo $ gitAnnexLink relf k
|
|
|
|
linksha <- hashSymlink symlink
|
2019-02-22 16:41:17 +00:00
|
|
|
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
|
2019-02-26 19:25:28 +00:00
|
|
|
|
2019-03-06 22:04:30 +00:00
|
|
|
{- Downloads all new ContentIdentifiers as needed to generate Keys.
|
|
|
|
- Supports concurrency when enabled.
|
2019-02-26 19:25:28 +00:00
|
|
|
-
|
2019-04-10 21:02:56 +00:00
|
|
|
- If any download fails, the whole thing fails with Nothing,
|
|
|
|
- but it will resume where it left off.
|
2019-02-26 19:25:28 +00:00
|
|
|
-}
|
2019-02-27 17:15:02 +00:00
|
|
|
downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents Key))
|
|
|
|
downloadImport remote importtreeconfig importablecontents = do
|
2019-02-26 19:25:28 +00:00
|
|
|
-- This map is used to remember content identifiers that
|
|
|
|
-- were just downloaded, before they have necessarily been
|
|
|
|
-- stored in the database. This way, if the same content
|
|
|
|
-- identifier appears multiple times in the
|
|
|
|
-- importablecontents (eg when it has a history),
|
|
|
|
-- they will only be downloaded once.
|
|
|
|
cidmap <- liftIO $ newTVarIO M.empty
|
2019-03-08 16:33:44 +00:00
|
|
|
-- When concurrency is enabled, this set is needed to
|
|
|
|
-- avoid two threads both downloading the same content identifier.
|
|
|
|
downloading <- liftIO $ newTVarIO S.empty
|
2019-03-04 20:47:30 +00:00
|
|
|
withExclusiveLock gitAnnexContentIdentifierLock $
|
2019-03-06 22:04:30 +00:00
|
|
|
bracket CIDDb.openDb CIDDb.closeDb $ \db -> do
|
2019-03-07 16:56:40 +00:00
|
|
|
CIDDb.needsUpdateFromLog db
|
|
|
|
>>= maybe noop (CIDDb.updateFromLog db)
|
2019-04-19 19:05:08 +00:00
|
|
|
go False cidmap downloading importablecontents db
|
2019-02-26 19:25:28 +00:00
|
|
|
where
|
2019-04-19 19:05:08 +00:00
|
|
|
go oldversion cidmap downloading (ImportableContents l h) db = do
|
2019-03-08 16:33:44 +00:00
|
|
|
jobs <- forM l $ \i ->
|
2019-04-19 19:05:08 +00:00
|
|
|
startdownload cidmap downloading db i oldversion
|
2019-03-08 16:33:44 +00:00
|
|
|
l' <- liftIO $ forM jobs $
|
|
|
|
either pure (atomically . takeTMVar)
|
2019-02-26 19:25:28 +00:00
|
|
|
if any isNothing l'
|
|
|
|
then return Nothing
|
|
|
|
else do
|
2019-04-19 19:05:08 +00:00
|
|
|
h' <- mapM (\ic -> go True cidmap downloading ic db) h
|
2019-02-26 19:25:28 +00:00
|
|
|
if any isNothing h'
|
|
|
|
then return Nothing
|
|
|
|
else return $ Just $
|
|
|
|
ImportableContents
|
|
|
|
(catMaybes l')
|
|
|
|
(catMaybes h')
|
|
|
|
|
2019-03-08 16:33:44 +00:00
|
|
|
waitstart downloading cid = liftIO $ atomically $ do
|
|
|
|
s <- readTVar downloading
|
|
|
|
if S.member cid s
|
|
|
|
then retry
|
|
|
|
else writeTVar downloading $ S.insert cid s
|
|
|
|
|
|
|
|
signaldone downloading cid = liftIO $ atomically $ do
|
|
|
|
s <- readTVar downloading
|
|
|
|
writeTVar downloading $ S.delete cid s
|
|
|
|
|
2019-04-19 19:05:08 +00:00
|
|
|
startdownload cidmap downloading db i@(loc, (cid, _sz)) oldversion = getcidkey cidmap db cid >>= \case
|
2019-03-08 16:33:44 +00:00
|
|
|
(k:_) -> return $ Left $ Just (loc, k)
|
|
|
|
[] -> do
|
|
|
|
job <- liftIO $ newEmptyTMVarIO
|
2019-03-08 16:43:03 +00:00
|
|
|
let downloadaction = do
|
2019-03-11 18:46:37 +00:00
|
|
|
showStart ("import " ++ Remote.name remote) (fromImportLocation loc)
|
2019-04-19 19:05:08 +00:00
|
|
|
when oldversion $
|
|
|
|
showNote "old version"
|
2019-03-08 16:33:44 +00:00
|
|
|
next $ tryNonAsync (download cidmap db i) >>= \case
|
|
|
|
Left e -> next $ do
|
|
|
|
warning (show e)
|
|
|
|
liftIO $ atomically $
|
|
|
|
putTMVar job Nothing
|
|
|
|
return False
|
|
|
|
Right r -> next $ do
|
|
|
|
liftIO $ atomically $
|
|
|
|
putTMVar job r
|
|
|
|
return True
|
|
|
|
commandAction $ bracket_
|
|
|
|
(waitstart downloading cid)
|
|
|
|
(signaldone downloading cid)
|
2019-03-08 16:43:03 +00:00
|
|
|
downloadaction
|
2019-03-08 16:33:44 +00:00
|
|
|
return (Right job)
|
|
|
|
|
2019-03-08 16:43:03 +00:00
|
|
|
download cidmap db (loc, (cid, sz)) = do
|
|
|
|
let rundownload tmpfile p =
|
|
|
|
Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile) p >>= \case
|
|
|
|
Just k -> tryNonAsync (moveAnnex k tmpfile) >>= \case
|
|
|
|
Right True -> do
|
|
|
|
recordcidkey cidmap db cid k
|
|
|
|
logStatus k InfoPresent
|
|
|
|
logChange k (Remote.uuid remote) InfoPresent
|
|
|
|
return $ Just (loc, k)
|
|
|
|
_ -> return Nothing
|
|
|
|
Nothing -> return Nothing
|
2019-03-08 16:33:44 +00:00
|
|
|
checkDiskSpaceToGet tmpkey Nothing $
|
2019-02-27 17:15:02 +00:00
|
|
|
withTmp tmpkey $ \tmpfile ->
|
2019-03-08 16:43:03 +00:00
|
|
|
metered Nothing tmpkey (return Nothing) $
|
|
|
|
const (rundownload tmpfile)
|
2019-02-27 17:15:02 +00:00
|
|
|
where
|
|
|
|
ia = Remote.importActions remote
|
|
|
|
tmpkey = importKey cid sz
|
2019-02-26 19:25:28 +00:00
|
|
|
|
2019-03-04 17:20:58 +00:00
|
|
|
mkkey loc tmpfile = do
|
2019-02-27 17:15:02 +00:00
|
|
|
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
|
|
|
|
backend <- chooseBackend f
|
|
|
|
let ks = KeySource
|
|
|
|
{ keyFilename = f
|
|
|
|
, contentLocation = tmpfile
|
|
|
|
, inodeCache = Nothing
|
|
|
|
}
|
2019-03-04 17:20:58 +00:00
|
|
|
fmap fst <$> genKey ks backend
|
2019-02-27 17:15:02 +00:00
|
|
|
|
|
|
|
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
|
|
|
|
ImportTree -> fromImportLocation loc
|
|
|
|
ImportSubTree subdir _ ->
|
|
|
|
getTopFilePath subdir </> fromImportLocation loc
|
2019-02-26 19:25:28 +00:00
|
|
|
|
|
|
|
getcidkey cidmap db cid = liftIO $
|
2019-03-06 22:04:30 +00:00
|
|
|
CIDDb.getContentIdentifierKeys db (Remote.uuid remote) cid >>= \case
|
2019-02-27 17:15:02 +00:00
|
|
|
[] -> atomically $
|
|
|
|
maybeToList . M.lookup cid <$> readTVar cidmap
|
2019-02-26 19:25:28 +00:00
|
|
|
l -> return l
|
|
|
|
|
|
|
|
recordcidkey cidmap db cid k = do
|
|
|
|
liftIO $ atomically $ modifyTVar' cidmap $
|
|
|
|
M.insert cid k
|
2019-03-06 22:04:30 +00:00
|
|
|
liftIO $ CIDDb.recordContentIdentifier db (Remote.uuid remote) cid k
|
|
|
|
CIDLog.recordContentIdentifier (Remote.uuid remote) cid k
|
2019-02-27 17:15:02 +00:00
|
|
|
|
|
|
|
{- Temporary key used for import of a ContentIdentifier while downloading
|
|
|
|
- content, before generating its real key. -}
|
|
|
|
importKey :: ContentIdentifier -> Integer -> Key
|
|
|
|
importKey (ContentIdentifier cid) size = stubKey
|
|
|
|
{ keyName = cid
|
|
|
|
, keyVariety = OtherKey "CID"
|
|
|
|
, keySize = Just size
|
|
|
|
}
|