2019-02-21 21:32:59 +00:00
|
|
|
{- git-annex import from remotes
|
|
|
|
-
|
2020-06-23 20:07:18 +00:00
|
|
|
- Copyright 2019-2020 Joey Hess <id@joeyh.name>
|
2019-02-21 21:32:59 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
2019-02-27 17:15:02 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2019-02-26 19:25:28 +00:00
|
|
|
|
2019-02-23 19:47:55 +00:00
|
|
|
module Annex.Import (
|
|
|
|
ImportTreeConfig(..),
|
|
|
|
ImportCommitConfig(..),
|
|
|
|
buildImportCommit,
|
2019-02-26 19:25:28 +00:00
|
|
|
buildImportTrees,
|
2020-07-03 17:41:57 +00:00
|
|
|
importKeys,
|
2019-05-21 18:38:00 +00:00
|
|
|
filterImportableContents,
|
|
|
|
makeImportMatcher,
|
2019-06-04 19:14:20 +00:00
|
|
|
listImportableContents,
|
2019-02-23 19:47:55 +00:00
|
|
|
) where
|
2019-02-21 21:32:59 +00:00
|
|
|
|
|
|
|
import Annex.Common
|
|
|
|
import Types.Import
|
2019-02-26 17:11:25 +00:00
|
|
|
import qualified Types.Remote as Remote
|
2019-02-21 21:32:59 +00:00
|
|
|
import Git.Types
|
|
|
|
import Git.Tree
|
2019-02-22 16:41:17 +00:00
|
|
|
import Git.Sha
|
2019-02-21 21:32:59 +00:00
|
|
|
import Git.FilePath
|
2019-04-24 19:13:07 +00:00
|
|
|
import Git.History
|
2019-02-22 16:41:17 +00:00
|
|
|
import qualified Git.Ref
|
|
|
|
import qualified Git.Branch
|
|
|
|
import qualified Annex
|
2019-02-21 21:32:59 +00:00
|
|
|
import Annex.Link
|
2019-02-22 16:41:17 +00:00
|
|
|
import Annex.LockFile
|
2019-02-27 17:15:02 +00:00
|
|
|
import Annex.Content
|
2019-03-01 17:26:15 +00:00
|
|
|
import Annex.Export
|
2019-05-01 17:13:00 +00:00
|
|
|
import Annex.RemoteTrackingBranch
|
2020-06-23 20:07:18 +00:00
|
|
|
import Annex.HashObject
|
2019-03-08 16:33:44 +00:00
|
|
|
import Command
|
2019-02-27 17:15:02 +00:00
|
|
|
import Backend
|
|
|
|
import Types.Key
|
|
|
|
import Types.KeySource
|
2019-03-08 16:43:03 +00:00
|
|
|
import Messages.Progress
|
2019-02-27 17:15:02 +00:00
|
|
|
import Utility.DataUnits
|
2019-06-25 15:37:52 +00:00
|
|
|
import Utility.Metered
|
2019-02-22 16:41:17 +00:00
|
|
|
import Logs.Export
|
2019-02-27 17:58:03 +00:00
|
|
|
import Logs.Location
|
2019-05-21 18:38:00 +00:00
|
|
|
import Logs.PreferredContent
|
|
|
|
import Types.FileMatcher
|
|
|
|
import Annex.FileMatcher
|
|
|
|
import Utility.Matcher (isEmpty)
|
2019-02-26 19:25:28 +00:00
|
|
|
import qualified Database.Export as Export
|
2019-03-06 22:04:30 +00:00
|
|
|
import qualified Database.ContentIdentifier as CIDDb
|
|
|
|
import qualified Logs.ContentIdentifier as CIDLog
|
2020-06-11 20:07:36 +00:00
|
|
|
import Backend.Utilities
|
2019-02-26 19:25:28 +00:00
|
|
|
|
|
|
|
import Control.Concurrent.STM
|
|
|
|
import qualified Data.Map.Strict as M
|
2019-03-08 16:33:44 +00:00
|
|
|
import qualified Data.Set as S
|
2019-06-04 19:14:20 +00:00
|
|
|
import qualified System.FilePath.Posix as Posix
|
2019-12-09 17:49:05 +00:00
|
|
|
import qualified System.FilePath.ByteString as P
|
2019-03-04 20:02:56 +00:00
|
|
|
|
2019-02-23 19:47:55 +00:00
|
|
|
{- Configures how to build an import tree. -}
|
|
|
|
data ImportTreeConfig
|
|
|
|
= ImportTree
|
|
|
|
-- ^ Import the tree as-is from the remote.
|
|
|
|
| ImportSubTree TopFilePath Sha
|
|
|
|
-- ^ Import a tree from the remote and graft it into a subdirectory
|
|
|
|
-- of the existing tree whose Sha is provided, replacing anything
|
|
|
|
-- that was there before.
|
|
|
|
deriving (Show)
|
|
|
|
|
|
|
|
{- Configures how to build an import commit. -}
|
|
|
|
data ImportCommitConfig = ImportCommitConfig
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
{ importCommitTracking :: Maybe Sha
|
|
|
|
-- ^ Current commit on the remote tracking branch.
|
2019-02-23 19:47:55 +00:00
|
|
|
, importCommitMode :: Git.Branch.CommitMode
|
|
|
|
, importCommitMessage :: String
|
|
|
|
}
|
|
|
|
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
{- Buils a commit for an import from a special remote.
|
2019-02-21 21:32:59 +00:00
|
|
|
-
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
- When there are no changes to make (importCommitTracking
|
|
|
|
- already matches what was imported), returns Nothing.
|
2019-02-23 19:47:55 +00:00
|
|
|
-
|
2019-02-21 21:32:59 +00:00
|
|
|
- After importing from a remote, exporting the same thing back to the
|
2019-02-22 16:41:17 +00:00
|
|
|
- remote should be a no-op. So, the export log and database are
|
|
|
|
- updated to reflect the imported tree.
|
2019-02-21 21:32:59 +00:00
|
|
|
-
|
2019-02-23 19:47:55 +00:00
|
|
|
- This does not download any content from a remote. But since it needs the
|
2019-02-22 16:41:17 +00:00
|
|
|
- Key of imported files to be known, its caller will have to first download
|
2019-02-21 21:32:59 +00:00
|
|
|
- new files in order to generate keys for them.
|
|
|
|
-}
|
|
|
|
buildImportCommit
|
2019-02-22 16:41:17 +00:00
|
|
|
:: Remote
|
2019-02-23 19:47:55 +00:00
|
|
|
-> ImportTreeConfig
|
|
|
|
-> ImportCommitConfig
|
2020-06-23 20:07:18 +00:00
|
|
|
-> ImportableContents (Either Sha Key)
|
2019-02-26 17:11:25 +00:00
|
|
|
-> Annex (Maybe Ref)
|
2019-02-23 19:47:55 +00:00
|
|
|
buildImportCommit remote importtreeconfig importcommitconfig importable =
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
case importCommitTracking importcommitconfig of
|
2019-04-24 19:13:07 +00:00
|
|
|
Nothing -> go Nothing
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
Just trackingcommit -> inRepo (Git.Ref.tree trackingcommit) >>= \case
|
2019-04-24 19:13:07 +00:00
|
|
|
Nothing -> go Nothing
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
Just _ -> go (Just trackingcommit)
|
2019-02-21 21:32:59 +00:00
|
|
|
where
|
2019-02-23 19:47:55 +00:00
|
|
|
basetree = case importtreeconfig of
|
|
|
|
ImportTree -> emptyTree
|
|
|
|
ImportSubTree _ sha -> sha
|
|
|
|
subdir = case importtreeconfig of
|
|
|
|
ImportTree -> Nothing
|
|
|
|
ImportSubTree dir _ -> Just dir
|
|
|
|
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
go trackingcommit = do
|
2019-02-23 19:47:55 +00:00
|
|
|
imported@(History finaltree _) <-
|
|
|
|
buildImportTrees basetree subdir importable
|
2019-05-20 20:37:04 +00:00
|
|
|
buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
Just finalcommit -> do
|
2019-03-11 17:44:23 +00:00
|
|
|
updatestate finaltree
|
2019-02-26 17:11:25 +00:00
|
|
|
return (Just finalcommit)
|
2019-04-24 19:13:07 +00:00
|
|
|
Nothing -> return Nothing
|
2019-03-11 17:44:23 +00:00
|
|
|
|
|
|
|
updatestate committedtree = do
|
|
|
|
importedtree <- case subdir of
|
|
|
|
Nothing -> pure committedtree
|
|
|
|
Just dir ->
|
|
|
|
let subtreeref = Ref $
|
2020-04-07 21:41:09 +00:00
|
|
|
fromRef' committedtree
|
|
|
|
<> ":"
|
|
|
|
<> getTopFilePath dir
|
2019-03-11 17:44:23 +00:00
|
|
|
in fromMaybe emptyTree
|
|
|
|
<$> inRepo (Git.Ref.tree subtreeref)
|
|
|
|
updateexportdb importedtree
|
|
|
|
oldexport <- updateexportlog importedtree
|
|
|
|
updatelocationlog oldexport importedtree
|
2019-04-24 19:13:07 +00:00
|
|
|
|
2019-03-07 19:59:44 +00:00
|
|
|
updateexportdb importedtree = do
|
|
|
|
db <- Export.openDb (Remote.uuid remote)
|
|
|
|
Export.writeLockDbWhile db $ do
|
2019-02-22 16:41:17 +00:00
|
|
|
prevtree <- liftIO $ fromMaybe emptyTree
|
2019-02-26 19:25:28 +00:00
|
|
|
<$> Export.getExportTreeCurrent db
|
2019-02-22 16:41:17 +00:00
|
|
|
when (importedtree /= prevtree) $ do
|
2019-02-27 19:29:41 +00:00
|
|
|
Export.updateExportDb db prevtree importedtree
|
2019-02-26 19:25:28 +00:00
|
|
|
liftIO $ Export.recordExportTreeCurrent db importedtree
|
2019-03-07 19:59:44 +00:00
|
|
|
Export.closeDb db
|
2019-02-23 19:47:55 +00:00
|
|
|
|
2019-02-22 16:41:17 +00:00
|
|
|
updateexportlog importedtree = do
|
2019-03-01 17:26:15 +00:00
|
|
|
oldexport <- getExport (Remote.uuid remote)
|
2019-02-26 17:11:25 +00:00
|
|
|
recordExport (Remote.uuid remote) $ ExportChange
|
2019-03-01 17:26:15 +00:00
|
|
|
{ oldTreeish = exportedTreeishes oldexport
|
2019-02-22 16:41:17 +00:00
|
|
|
, newTreeish = importedtree
|
|
|
|
}
|
2019-03-01 17:26:15 +00:00
|
|
|
return oldexport
|
|
|
|
|
|
|
|
-- downloadImport takes care of updating the location log
|
|
|
|
-- for the local repo when keys are downloaded, and also updates
|
|
|
|
-- the location log for the remote for keys that are present in it.
|
|
|
|
-- That leaves updating the location log for the remote for keys
|
|
|
|
-- that have had the last copy of their content removed from it.
|
|
|
|
--
|
|
|
|
-- This must run after the export database has been updated
|
|
|
|
-- and flushed to disk, so it can query it.
|
|
|
|
updatelocationlog oldexport finaltree = do
|
|
|
|
let stillpresent db k = liftIO $ not . null
|
|
|
|
<$> Export.getExportedLocation db k
|
|
|
|
let updater db oldkey _newkey _ = case oldkey of
|
|
|
|
Just (AnnexKey k) -> unlessM (stillpresent db k) $
|
|
|
|
logChange k (Remote.uuid remote) InfoMissing
|
|
|
|
Just (GitKey _) -> noop
|
|
|
|
Nothing -> noop
|
|
|
|
db <- Export.openDb (Remote.uuid remote)
|
|
|
|
forM_ (exportedTreeishes oldexport) $ \oldtree ->
|
|
|
|
Export.runExportDiffUpdater updater db oldtree finaltree
|
|
|
|
Export.closeDb db
|
2019-02-21 21:32:59 +00:00
|
|
|
|
2019-05-20 20:37:04 +00:00
|
|
|
buildImportCommit' :: Remote -> ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha)
|
|
|
|
buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History ti _) =
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
case mtrackingcommit of
|
2019-05-21 15:32:54 +00:00
|
|
|
Nothing -> Just <$> mkcommitsunconnected imported
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
Just trackingcommit -> do
|
|
|
|
-- Get history of tracking branch to at most
|
2019-05-01 18:20:26 +00:00
|
|
|
-- one more level deep than what was imported,
|
|
|
|
-- so we'll have enough history to compare,
|
|
|
|
-- but not spend too much time getting it.
|
2019-05-21 15:32:54 +00:00
|
|
|
let maxdepth = succ importeddepth
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
inRepo (getHistoryToDepth maxdepth trackingcommit)
|
|
|
|
>>= go trackingcommit
|
2019-04-26 14:17:02 +00:00
|
|
|
where
|
2019-05-21 15:32:54 +00:00
|
|
|
go _ Nothing = Just <$> mkcommitsunconnected imported
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
go trackingcommit (Just h)
|
2019-05-01 16:37:54 +00:00
|
|
|
-- If the tracking branch head is a merge commit
|
|
|
|
-- and one side of the merge matches the history,
|
|
|
|
-- nothing new needs to be committed.
|
2019-05-21 15:32:54 +00:00
|
|
|
| t == ti && any sametodepth (S.toList s) = return Nothing
|
2019-05-01 18:20:26 +00:00
|
|
|
-- If the tracking branch matches the history,
|
|
|
|
-- nothing new needs to be committed.
|
|
|
|
-- (This is unlikely to happen.)
|
2019-05-21 15:32:54 +00:00
|
|
|
| sametodepth h' = return Nothing
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
| otherwise = do
|
2019-05-01 18:20:26 +00:00
|
|
|
importedcommit <- case getRemoteTrackingBranchImportHistory h of
|
2019-05-21 15:32:54 +00:00
|
|
|
Nothing -> mkcommitsunconnected imported
|
|
|
|
Just oldimported@(History oldhc _)
|
|
|
|
| importeddepth == 1 ->
|
|
|
|
mkcommitconnected imported oldimported
|
|
|
|
| otherwise -> do
|
|
|
|
let oldimportedtrees = mapHistory historyCommitTree oldimported
|
|
|
|
mknewcommits oldhc oldimportedtrees imported
|
2019-05-21 18:38:00 +00:00
|
|
|
ti' <- addBackExportExcluded remote ti
|
2019-05-01 17:13:00 +00:00
|
|
|
Just <$> makeRemoteTrackingBranchMergeCommit'
|
2019-05-20 20:37:04 +00:00
|
|
|
trackingcommit importedcommit ti'
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
where
|
|
|
|
h'@(History t s) = mapHistory historyCommitTree h
|
2019-04-23 20:34:19 +00:00
|
|
|
|
2019-05-21 15:32:54 +00:00
|
|
|
importeddepth = historyDepth imported
|
|
|
|
|
|
|
|
sametodepth b = imported == truncateHistoryToDepth importeddepth b
|
2019-04-23 20:34:19 +00:00
|
|
|
|
make import tree from remote generate a merge commit
This way no history is lost, neither what was exported to the remote,
or the history of changes that is imported from it. No complicated
correlation of two possibly very different histories is needed, just
record what we know and then git merge will do a good job.
Also, it notices when the remote tracking branch doesn't need to be updated,
and avoids doing anything, so noop remotes are super cheap.
The only catch here is that, since the commits generated for imports
from the remote don't have a stable date or author/committer, each
(non-noop) import generates different commits for the same imported
trees. So, when the imported remote tracking branch is merged into master
and then a change is imported again, there will be an extra series of
commits, which will get more and more expensive each time.
This seems to call for making stable commits for imports. Also that
seems a good idea to make importing in several repositories have the
same result.
2019-04-30 20:13:21 +00:00
|
|
|
mkcommit parents tree = inRepo $ Git.Branch.commitTree
|
|
|
|
(importCommitMode importcommitconfig)
|
|
|
|
(importCommitMessage importcommitconfig)
|
|
|
|
parents
|
|
|
|
tree
|
|
|
|
|
2019-05-21 15:32:54 +00:00
|
|
|
-- Start a new history of import commits, not connected to any
|
|
|
|
-- prior import commits.
|
|
|
|
mkcommitsunconnected (History importedtree hs) = do
|
|
|
|
parents <- mapM mkcommitsunconnected (S.toList hs)
|
|
|
|
mkcommit parents importedtree
|
|
|
|
|
|
|
|
-- Commit the new history connected with the old history.
|
|
|
|
-- Used when the import is not versioned, so the history depth is 1.
|
|
|
|
mkcommitconnected (History importedtree _) (History oldhc _) = do
|
|
|
|
let parents = [historyCommit oldhc]
|
2019-05-01 18:20:26 +00:00
|
|
|
mkcommit parents importedtree
|
|
|
|
|
2019-05-01 19:34:07 +00:00
|
|
|
-- Reuse the commits from the old imported History when possible.
|
|
|
|
mknewcommits oldhc old new@(History importedtree hs)
|
|
|
|
| old == new = return $ historyCommit oldhc
|
2019-05-01 18:20:26 +00:00
|
|
|
| otherwise = do
|
2019-05-01 19:34:07 +00:00
|
|
|
parents <- mapM (mknewcommits oldhc old) (S.toList hs)
|
2019-05-01 18:20:26 +00:00
|
|
|
mkcommit parents importedtree
|
|
|
|
|
2019-02-22 16:41:17 +00:00
|
|
|
{- Builds a history of git trees reflecting the ImportableContents.
|
|
|
|
-
|
|
|
|
- When a subdir is provided, imported tree is grafted into the basetree at
|
|
|
|
- that location, replacing any object that was there.
|
|
|
|
-}
|
2019-02-21 21:32:59 +00:00
|
|
|
buildImportTrees
|
2019-02-22 16:41:17 +00:00
|
|
|
:: Ref
|
|
|
|
-> Maybe TopFilePath
|
2020-06-23 20:07:18 +00:00
|
|
|
-> ImportableContents (Either Sha Key)
|
2019-02-21 21:32:59 +00:00
|
|
|
-> Annex (History Sha)
|
2019-02-22 16:41:17 +00:00
|
|
|
buildImportTrees basetree msubdir importable = History
|
2019-04-23 19:08:37 +00:00
|
|
|
<$> (buildtree (importableContents importable) =<< Annex.gitRepo)
|
|
|
|
<*> buildhistory
|
2019-02-21 21:32:59 +00:00
|
|
|
where
|
2019-04-23 19:08:37 +00:00
|
|
|
buildhistory = S.fromList
|
|
|
|
<$> mapM (buildImportTrees basetree msubdir)
|
|
|
|
(importableHistory importable)
|
|
|
|
|
|
|
|
buildtree ls repo = withMkTreeHandle repo $ \hdl -> do
|
2019-02-22 16:41:17 +00:00
|
|
|
importtree <- liftIO . recordTree' hdl
|
|
|
|
. treeItemsToTree
|
|
|
|
=<< mapM mktreeitem ls
|
|
|
|
case msubdir of
|
|
|
|
Nothing -> return importtree
|
|
|
|
Just subdir -> liftIO $
|
|
|
|
graftTree' importtree subdir basetree repo hdl
|
2019-04-23 19:08:37 +00:00
|
|
|
|
2020-06-23 20:07:18 +00:00
|
|
|
mktreeitem (loc, v) = case v of
|
|
|
|
Right k -> do
|
|
|
|
relf <- fromRepo $ fromTopFilePath topf
|
|
|
|
symlink <- calcRepo $ gitAnnexLink (fromRawFilePath relf) k
|
|
|
|
linksha <- hashSymlink symlink
|
|
|
|
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
|
|
|
|
Left sha ->
|
|
|
|
return $ TreeItem treepath (fromTreeItemType TreeFile) sha
|
|
|
|
where
|
|
|
|
lf = fromImportLocation loc
|
|
|
|
treepath = asTopFilePath lf
|
|
|
|
topf = asTopFilePath $
|
2019-12-09 17:49:05 +00:00
|
|
|
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
|
2019-02-26 19:25:28 +00:00
|
|
|
|
2020-07-03 17:41:57 +00:00
|
|
|
{- Downloads all new ContentIdentifiers, or when importcontent is False,
|
|
|
|
- generates Keys without downloading.
|
2019-02-26 19:25:28 +00:00
|
|
|
-
|
2020-07-03 17:41:57 +00:00
|
|
|
- Generates either a Key or a git Sha, depending on annex.largefiles.
|
|
|
|
- But when importcontent is False, it cannot match on annex.largefiles
|
|
|
|
- (or generate a git Sha), so always generates Keys.
|
|
|
|
-
|
|
|
|
- Supports concurrency when enabled.
|
|
|
|
-
|
|
|
|
- If it fails on any file, the whole thing fails with Nothing,
|
2019-04-10 21:02:56 +00:00
|
|
|
- but it will resume where it left off.
|
2020-06-23 20:07:18 +00:00
|
|
|
-
|
2020-07-03 17:41:57 +00:00
|
|
|
- Note that, when a ContentIdentifier has been imported before,
|
|
|
|
- generates the same thing that was imported before, so annex.largefiles
|
|
|
|
- is not reapplied.
|
2019-02-26 19:25:28 +00:00
|
|
|
-}
|
2020-07-03 17:41:57 +00:00
|
|
|
importKeys
|
|
|
|
:: Remote
|
|
|
|
-> ImportTreeConfig
|
|
|
|
-> Bool
|
|
|
|
-> ImportableContents (ContentIdentifier, ByteSize)
|
|
|
|
-> Annex (Maybe (ImportableContents (Either Sha Key)))
|
|
|
|
importKeys remote importtreeconfig importcontent importablecontents = do
|
|
|
|
when (not importcontent && isNothing (Remote.importKey ia)) $
|
|
|
|
giveup "This remote does not support importing without downloading content."
|
2019-02-26 19:25:28 +00:00
|
|
|
-- This map is used to remember content identifiers that
|
2020-07-03 17:41:57 +00:00
|
|
|
-- were just imported, before they have necessarily been
|
2019-02-26 19:25:28 +00:00
|
|
|
-- stored in the database. This way, if the same content
|
|
|
|
-- identifier appears multiple times in the
|
|
|
|
-- importablecontents (eg when it has a history),
|
2020-07-03 17:41:57 +00:00
|
|
|
-- they will only be imported once.
|
2019-02-26 19:25:28 +00:00
|
|
|
cidmap <- liftIO $ newTVarIO M.empty
|
2019-03-08 16:33:44 +00:00
|
|
|
-- When concurrency is enabled, this set is needed to
|
2020-07-03 17:41:57 +00:00
|
|
|
-- avoid two threads both importing the same content identifier.
|
|
|
|
importing <- liftIO $ newTVarIO S.empty
|
2019-03-04 20:47:30 +00:00
|
|
|
withExclusiveLock gitAnnexContentIdentifierLock $
|
2019-03-06 22:04:30 +00:00
|
|
|
bracket CIDDb.openDb CIDDb.closeDb $ \db -> do
|
2019-03-07 16:56:40 +00:00
|
|
|
CIDDb.needsUpdateFromLog db
|
|
|
|
>>= maybe noop (CIDDb.updateFromLog db)
|
2020-07-03 17:41:57 +00:00
|
|
|
go False cidmap importing importablecontents db
|
2019-02-26 19:25:28 +00:00
|
|
|
where
|
2020-07-03 17:41:57 +00:00
|
|
|
go oldversion cidmap importing (ImportableContents l h) db = do
|
2020-06-23 20:07:18 +00:00
|
|
|
largematcher <- largeFilesMatcher
|
2019-03-08 16:33:44 +00:00
|
|
|
jobs <- forM l $ \i ->
|
2020-07-03 17:41:57 +00:00
|
|
|
startimport cidmap importing db i oldversion largematcher
|
2019-03-08 16:33:44 +00:00
|
|
|
l' <- liftIO $ forM jobs $
|
|
|
|
either pure (atomically . takeTMVar)
|
2019-02-26 19:25:28 +00:00
|
|
|
if any isNothing l'
|
|
|
|
then return Nothing
|
|
|
|
else do
|
2020-07-03 17:41:57 +00:00
|
|
|
h' <- mapM (\ic -> go True cidmap importing ic db) h
|
2019-02-26 19:25:28 +00:00
|
|
|
if any isNothing h'
|
|
|
|
then return Nothing
|
|
|
|
else return $ Just $
|
|
|
|
ImportableContents
|
|
|
|
(catMaybes l')
|
|
|
|
(catMaybes h')
|
|
|
|
|
2020-07-03 17:41:57 +00:00
|
|
|
waitstart importing cid = liftIO $ atomically $ do
|
|
|
|
s <- readTVar importing
|
2019-03-08 16:33:44 +00:00
|
|
|
if S.member cid s
|
|
|
|
then retry
|
2020-07-03 17:41:57 +00:00
|
|
|
else writeTVar importing $ S.insert cid s
|
2019-03-08 16:33:44 +00:00
|
|
|
|
2020-07-03 17:41:57 +00:00
|
|
|
signaldone importing cid = liftIO $ atomically $ do
|
|
|
|
s <- readTVar importing
|
|
|
|
writeTVar importing $ S.delete cid s
|
2019-03-08 16:33:44 +00:00
|
|
|
|
2020-07-03 17:41:57 +00:00
|
|
|
startimport cidmap importing db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
|
2020-06-23 20:07:18 +00:00
|
|
|
(k:ks) ->
|
|
|
|
-- If the same content was imported before
|
|
|
|
-- yeilding multiple different keys, it's not clear
|
|
|
|
-- which is best to use this time, so pick the
|
|
|
|
-- first in the list. But, if any of them is a
|
|
|
|
-- git sha, use it, because the content must
|
|
|
|
-- be included in the git repo then.
|
|
|
|
let v = case mapMaybe keyGitSha (k:ks) of
|
|
|
|
(sha:_) -> Left sha
|
|
|
|
[] -> Right k
|
|
|
|
in return $ Left $ Just (loc, v)
|
2019-03-08 16:33:44 +00:00
|
|
|
[] -> do
|
|
|
|
job <- liftIO $ newEmptyTMVarIO
|
2019-12-04 17:15:34 +00:00
|
|
|
let ai = ActionItemOther (Just (fromRawFilePath (fromImportLocation loc)))
|
2020-07-03 17:41:57 +00:00
|
|
|
let importaction = starting ("import " ++ Remote.name remote) ai $ do
|
2019-04-19 19:05:08 +00:00
|
|
|
when oldversion $
|
|
|
|
showNote "old version"
|
2020-07-03 17:41:57 +00:00
|
|
|
tryNonAsync (importordownload cidmap db i largematcher) >>= \case
|
2019-03-08 16:33:44 +00:00
|
|
|
Left e -> next $ do
|
|
|
|
warning (show e)
|
|
|
|
liftIO $ atomically $
|
|
|
|
putTMVar job Nothing
|
|
|
|
return False
|
|
|
|
Right r -> next $ do
|
|
|
|
liftIO $ atomically $
|
|
|
|
putTMVar job r
|
|
|
|
return True
|
|
|
|
commandAction $ bracket_
|
2020-07-03 17:41:57 +00:00
|
|
|
(waitstart importing cid)
|
|
|
|
(signaldone importing cid)
|
|
|
|
importaction
|
2019-03-08 16:33:44 +00:00
|
|
|
return (Right job)
|
|
|
|
|
2020-07-03 17:41:57 +00:00
|
|
|
importordownload
|
|
|
|
| not importcontent = doimport
|
|
|
|
| otherwise = dodownload
|
|
|
|
|
|
|
|
doimport cidmap db (loc, (cid, sz)) _largematcher =
|
|
|
|
case Remote.importKey ia of
|
|
|
|
Nothing -> error "internal" -- checked earlier
|
|
|
|
Just a -> do
|
|
|
|
let importer p = do
|
2020-07-03 18:22:22 +00:00
|
|
|
unsizedk <- a loc cid p
|
|
|
|
-- This avoids every remote needing
|
|
|
|
-- to add the size.
|
|
|
|
let k = alterKey unsizedk $ \kd -> kd
|
|
|
|
{ keySize = keySize kd <|> Just sz }
|
2020-07-03 17:41:57 +00:00
|
|
|
checkSecureHashes k >>= \case
|
|
|
|
Nothing -> do
|
|
|
|
recordcidkey cidmap db cid k
|
|
|
|
logChange k (Remote.uuid remote) InfoPresent
|
|
|
|
return (Right k)
|
|
|
|
Just msg -> giveup (msg ++ " to import")
|
|
|
|
let runimport p = tryNonAsync (importer p) >>= \case
|
|
|
|
Right k -> return $ Just (loc, k)
|
|
|
|
Left e -> do
|
|
|
|
warning (show e)
|
|
|
|
return Nothing
|
|
|
|
metered Nothing sz $
|
|
|
|
const runimport
|
|
|
|
|
|
|
|
dodownload cidmap db (loc, (cid, sz)) largematcher = do
|
2020-05-15 16:51:09 +00:00
|
|
|
let downloader tmpfile p = do
|
2020-06-23 20:07:18 +00:00
|
|
|
k <- Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile largematcher) p
|
|
|
|
case keyGitSha k of
|
|
|
|
Nothing -> do
|
|
|
|
ok <- moveAnnex k tmpfile
|
|
|
|
when ok $ do
|
|
|
|
recordcidkey cidmap db cid k
|
|
|
|
logStatus k InfoPresent
|
|
|
|
logChange k (Remote.uuid remote) InfoPresent
|
|
|
|
return (Right k, ok)
|
|
|
|
Just sha -> do
|
|
|
|
recordcidkey cidmap db cid k
|
|
|
|
return (Left sha, True)
|
2020-05-15 16:51:09 +00:00
|
|
|
let rundownload tmpfile p = tryNonAsync (downloader tmpfile p) >>= \case
|
2020-06-23 20:07:18 +00:00
|
|
|
Right (v, True) -> return $ Just (loc, v)
|
2020-05-15 16:51:09 +00:00
|
|
|
Right (_, False) -> return Nothing
|
|
|
|
Left e -> do
|
|
|
|
warning (show e)
|
|
|
|
return Nothing
|
2019-03-08 16:33:44 +00:00
|
|
|
checkDiskSpaceToGet tmpkey Nothing $
|
2019-02-27 17:15:02 +00:00
|
|
|
withTmp tmpkey $ \tmpfile ->
|
2019-06-25 16:30:18 +00:00
|
|
|
metered Nothing tmpkey $
|
2019-03-08 16:43:03 +00:00
|
|
|
const (rundownload tmpfile)
|
2019-02-27 17:15:02 +00:00
|
|
|
where
|
|
|
|
tmpkey = importKey cid sz
|
2020-07-03 17:41:57 +00:00
|
|
|
|
|
|
|
ia = Remote.importActions remote
|
2019-02-26 19:25:28 +00:00
|
|
|
|
2020-06-23 20:07:18 +00:00
|
|
|
mkkey loc tmpfile largematcher = do
|
2019-02-27 17:15:02 +00:00
|
|
|
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
|
2020-06-23 20:07:18 +00:00
|
|
|
matcher <- largematcher (fromRawFilePath f)
|
|
|
|
let mi = MatchingFile FileInfo
|
|
|
|
{ matchFile = f
|
|
|
|
, currFile = toRawFilePath tmpfile
|
2019-02-27 17:15:02 +00:00
|
|
|
}
|
2020-06-23 20:07:18 +00:00
|
|
|
islargefile <- checkMatcher' matcher mi mempty
|
|
|
|
if islargefile
|
|
|
|
then do
|
|
|
|
backend <- chooseBackend (fromRawFilePath f)
|
|
|
|
let ks = KeySource
|
|
|
|
{ keyFilename = f
|
|
|
|
, contentLocation = toRawFilePath tmpfile
|
|
|
|
, inodeCache = Nothing
|
|
|
|
}
|
|
|
|
fst <$> genKey ks nullMeterUpdate backend
|
|
|
|
else gitShaKey <$> hashFile tmpfile
|
2019-02-27 17:15:02 +00:00
|
|
|
|
|
|
|
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
|
2019-12-09 17:49:05 +00:00
|
|
|
ImportTree -> fromImportLocation loc
|
2019-02-27 17:15:02 +00:00
|
|
|
ImportSubTree subdir _ ->
|
2019-12-09 17:49:05 +00:00
|
|
|
getTopFilePath subdir P.</> fromImportLocation loc
|
2019-02-26 19:25:28 +00:00
|
|
|
|
|
|
|
getcidkey cidmap db cid = liftIO $
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
CIDDb.getContentIdentifierKeys db rs cid >>= \case
|
2019-02-27 17:15:02 +00:00
|
|
|
[] -> atomically $
|
|
|
|
maybeToList . M.lookup cid <$> readTVar cidmap
|
2019-02-26 19:25:28 +00:00
|
|
|
l -> return l
|
|
|
|
|
|
|
|
recordcidkey cidmap db cid k = do
|
|
|
|
liftIO $ atomically $ modifyTVar' cidmap $
|
|
|
|
M.insert cid k
|
add RemoteStateHandle
This solves the problem of sameas remotes trampling over per-remote
state. Used for:
* per-remote state, of course
* per-remote metadata, also of course
* per-remote content identifiers, because two remote implementations
could in theory generate the same content identifier for two different
peices of content
While chunk logs are per-remote data, they don't use this, because the
number and size of chunks stored is a common property across sameas
remotes.
External special remote had a complication, where it was theoretically
possible for a remote to send SETSTATE or GETSTATE during INITREMOTE or
EXPORTSUPPORTED. Since the uuid of the remote is typically generate in
Remote.setup, it would only be possible to pass a Maybe
RemoteStateHandle into it, and it would otherwise have to construct its
own. Rather than go that route, I decided to send an ERROR in this case.
It seems unlikely that any existing external special remote will be
affected. They would have to make up a git-annex key, and set state for
some reason during INITREMOTE. I can imagine such a hack, but it doesn't
seem worth complicating the code in such an ugly way to support it.
Unfortunately, both TestRemote and Annex.Import needed the Remote
to have a new field added that holds its RemoteStateHandle.
2019-10-14 16:33:27 +00:00
|
|
|
liftIO $ CIDDb.recordContentIdentifier db rs cid k
|
|
|
|
CIDLog.recordContentIdentifier rs cid k
|
|
|
|
|
|
|
|
rs = Remote.remoteStateHandle remote
|
2019-02-27 17:15:02 +00:00
|
|
|
|
|
|
|
{- Temporary key used for import of a ContentIdentifier while downloading
|
|
|
|
- content, before generating its real key. -}
|
|
|
|
importKey :: ContentIdentifier -> Integer -> Key
|
2019-11-22 20:24:04 +00:00
|
|
|
importKey (ContentIdentifier cid) size = mkKey $ \k -> k
|
2020-06-11 20:07:36 +00:00
|
|
|
{ keyName = genKeyName (decodeBS cid)
|
2019-02-27 17:15:02 +00:00
|
|
|
, keyVariety = OtherKey "CID"
|
|
|
|
, keySize = Just size
|
|
|
|
}
|
2019-05-20 20:37:04 +00:00
|
|
|
|
|
|
|
{-- Export omits non-preferred content from the tree stored on the
|
|
|
|
-- remote. So the import will normally have that content
|
|
|
|
-- omitted (unless something else added files with the same names to the
|
|
|
|
-- special remote).
|
|
|
|
--
|
|
|
|
-- That presents a problem: Merging the imported tree would result
|
2019-05-21 18:38:00 +00:00
|
|
|
-- in deletion of the files that were excluded from export.
|
|
|
|
-- To avoid that happening, this adds them back to the imported tree.
|
2019-05-20 20:37:04 +00:00
|
|
|
--}
|
2019-05-21 18:38:00 +00:00
|
|
|
addBackExportExcluded :: Remote -> Sha -> Annex Sha
|
|
|
|
addBackExportExcluded remote importtree =
|
2019-05-20 20:37:04 +00:00
|
|
|
getExportExcluded (Remote.uuid remote) >>= \case
|
|
|
|
[] -> return importtree
|
|
|
|
excludedlist -> inRepo $
|
|
|
|
adjustTree
|
|
|
|
-- don't remove any
|
|
|
|
(pure . Just)
|
|
|
|
excludedlist
|
|
|
|
-- if something was imported with the same
|
|
|
|
-- name as a file that was previously
|
|
|
|
-- excluded from import, use what was imported
|
|
|
|
(\imported _excluded -> imported)
|
|
|
|
[]
|
|
|
|
importtree
|
2019-05-21 18:38:00 +00:00
|
|
|
|
|
|
|
{- Match the preferred content of the remote at import time.
|
|
|
|
-
|
|
|
|
- Only keyless tokens are supported, because the keys are not known
|
|
|
|
- until an imported file is downloaded, which is too late to bother
|
|
|
|
- excluding it from an import.
|
|
|
|
-}
|
|
|
|
makeImportMatcher :: Remote -> Annex (Either String (FileMatcher Annex))
|
|
|
|
makeImportMatcher r = load preferredContentKeylessTokens >>= \case
|
|
|
|
Nothing -> return $ Right matchAll
|
|
|
|
Just (Right v) -> return $ Right v
|
|
|
|
Just (Left err) -> load preferredContentTokens >>= \case
|
|
|
|
Just (Left err') -> return $ Left err'
|
|
|
|
_ -> return $ Left $
|
|
|
|
"The preferred content expression contains terms that cannot be checked when importing: " ++ err
|
|
|
|
where
|
|
|
|
load t = M.lookup (Remote.uuid r) . fst <$> preferredRequiredMapsLoad' t
|
|
|
|
|
|
|
|
wantImport :: FileMatcher Annex -> ImportLocation -> ByteSize -> Annex Bool
|
|
|
|
wantImport matcher loc sz = checkMatcher' matcher mi mempty
|
|
|
|
where
|
|
|
|
mi = MatchingInfo $ ProvidedInfo
|
2019-12-04 17:15:34 +00:00
|
|
|
{ providedFilePath = Right $ fromRawFilePath $ fromImportLocation loc
|
2019-05-21 18:38:00 +00:00
|
|
|
, providedKey = unavail "key"
|
|
|
|
, providedFileSize = Right sz
|
|
|
|
, providedMimeType = unavail "mime"
|
|
|
|
, providedMimeEncoding = unavail "mime"
|
|
|
|
}
|
|
|
|
-- This should never run, as long as the FileMatcher was generated
|
|
|
|
-- using the preferredContentKeylessTokens.
|
|
|
|
unavail v = Left $ error $ "Internal error: unavailable " ++ v
|
|
|
|
|
|
|
|
{- If a file is not preferred content, but it was previously exported or
|
|
|
|
- imported to the remote, not importing it would result in a remote
|
|
|
|
- tracking branch that, when merged, would delete the file.
|
|
|
|
-
|
|
|
|
- To avoid that problem, such files are included in the import.
|
|
|
|
- The next export will remove them from the remote.
|
|
|
|
-}
|
|
|
|
shouldImport :: Export.ExportHandle -> FileMatcher Annex -> ImportLocation -> ByteSize -> Annex Bool
|
|
|
|
shouldImport dbhandle matcher loc sz =
|
|
|
|
wantImport matcher loc sz
|
|
|
|
<||>
|
|
|
|
liftIO (not . null <$> Export.getExportTreeKey dbhandle loc)
|
|
|
|
|
|
|
|
filterImportableContents :: Remote -> FileMatcher Annex -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (ImportableContents (ContentIdentifier, ByteSize))
|
|
|
|
filterImportableContents r matcher importable
|
|
|
|
| isEmpty matcher = return importable
|
|
|
|
| otherwise = do
|
|
|
|
dbhandle <- Export.openDb (Remote.uuid r)
|
|
|
|
go dbhandle importable
|
|
|
|
where
|
|
|
|
go dbhandle ic = ImportableContents
|
|
|
|
<$> filterM (match dbhandle) (importableContents ic)
|
|
|
|
<*> mapM (go dbhandle) (importableHistory ic)
|
|
|
|
|
|
|
|
match dbhandle (loc, (_cid, sz)) = shouldImport dbhandle matcher loc sz
|
2019-06-04 19:14:20 +00:00
|
|
|
|
|
|
|
{- Gets the ImportableContents from the remote.
|
|
|
|
-
|
|
|
|
- Filters out any paths that include a ".git" component, because git does
|
|
|
|
- not allow storing ".git" in a git repository. While it is possible to
|
|
|
|
- write a git tree that contains that, git will complain and refuse to
|
|
|
|
- check it out.
|
|
|
|
-}
|
|
|
|
listImportableContents :: Remote -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
|
|
|
listImportableContents r = fmap removegitspecial
|
|
|
|
<$> Remote.listImportableContents (Remote.importActions r)
|
|
|
|
where
|
|
|
|
removegitspecial ic = ImportableContents
|
|
|
|
{ importableContents =
|
|
|
|
filter (not . gitspecial . fst) (importableContents ic)
|
|
|
|
, importableHistory =
|
|
|
|
map removegitspecial (importableHistory ic)
|
|
|
|
}
|
2019-12-04 17:15:34 +00:00
|
|
|
gitspecial l = ".git" `elem` Posix.splitDirectories (fromRawFilePath (fromImportLocation l))
|