incomplete action to download files from import

This commit is contained in:
Joey Hess 2019-02-26 15:25:28 -04:00
parent b6e2a5e9c2
commit f4b773e9a1
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 100 additions and 13 deletions

View file

@ -5,13 +5,16 @@
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE BangPatterns #-}
module Annex.Import ( module Annex.Import (
RemoteTrackingBranch(..), RemoteTrackingBranch(..),
mkRemoteTrackingBranch, mkRemoteTrackingBranch,
ImportTreeConfig(..), ImportTreeConfig(..),
ImportCommitConfig(..), ImportCommitConfig(..),
buildImportCommit, buildImportCommit,
buildImportTrees buildImportTrees,
downloadImport
) where ) where
import Annex.Common import Annex.Common
@ -26,8 +29,14 @@ import qualified Git.Branch
import qualified Annex import qualified Annex
import Annex.Link import Annex.Link
import Annex.LockFile import Annex.LockFile
import Utility.Metered
import Logs.Export import Logs.Export
import Database.Export import Logs.ContentIdentifier
import qualified Database.Export as Export
import qualified Database.ContentIdentifier as CID
import Control.Concurrent.STM
import qualified Data.Map.Strict as M
newtype RemoteTrackingBranch = RemoteTrackingBranch newtype RemoteTrackingBranch = RemoteTrackingBranch
{ fromRemoteTrackingBranch :: Ref } { fromRemoteTrackingBranch :: Ref }
@ -120,14 +129,14 @@ buildImportCommit remote importtreeconfig importcommitconfig importable =
updateexportdb importedtree = updateexportdb importedtree =
withExclusiveLock (gitAnnexExportLock (Remote.uuid remote)) $ do withExclusiveLock (gitAnnexExportLock (Remote.uuid remote)) $ do
db <- openDb (Remote.uuid remote) db <- Export.openDb (Remote.uuid remote)
prevtree <- liftIO $ fromMaybe emptyTree prevtree <- liftIO $ fromMaybe emptyTree
<$> getExportTreeCurrent db <$> Export.getExportTreeCurrent db
when (importedtree /= prevtree) $ do when (importedtree /= prevtree) $ do
updateExportTree db prevtree importedtree Export.updateExportTree db prevtree importedtree
liftIO $ recordExportTreeCurrent db importedtree liftIO $ Export.recordExportTreeCurrent db importedtree
-- TODO: addExportedLocation etc -- TODO: addExportedLocation etc
liftIO $ flushDbQueue db Export.closeDb db
updateexportlog importedtree = do updateexportlog importedtree = do
old <- getExport (Remote.uuid remote) old <- getExport (Remote.uuid remote)
@ -170,3 +179,63 @@ buildImportTrees basetree msubdir importable = History
symlink <- calcRepo $ gitAnnexLink relf k symlink <- calcRepo $ gitAnnexLink relf k
linksha <- hashSymlink symlink linksha <- hashSymlink symlink
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
{- Downloads all new ContentIdentifiers. Supports concurrency when enabled.
-
- If any download fails, the whole thing fails, but it will resume where
- it left off.
-}
downloadImport :: Remote -> ImportableContents ContentIdentifier -> Annex (Maybe (ImportableContents Key))
downloadImport remote importablecontents = do
-- This map is used to remember content identifiers that
-- were just downloaded, before they have necessarily been
-- stored in the database. This way, if the same content
-- identifier appears multiple times in the
-- importablecontents (eg when it has a history),
-- they will only be downloaded once.
cidmap <- liftIO $ newTVarIO M.empty
bracket CID.openDb CID.closeDb (go cidmap importablecontents)
-- TODO really support concurrency; avoid donwloading the same
-- ContentIdentifier twice.
where
go cidmap (ImportableContents l h) db = do
l' <- mapM (download cidmap db) l
if any isNothing l'
then return Nothing
else do
h' <- mapM (\ic -> go cidmap ic db) h
if any isNothing h'
then return Nothing
else return $ Just $
ImportableContents
(catMaybes l')
(catMaybes h')
download cidmap db (loc, cid) = getcidkey cidmap db cid >>= \case
(k:_) -> return $ Just (loc, k)
[] -> do
-- TODO progresss bar
let p = nullMeterUpdate
let ia = Remote.importActions remote
Remote.retrieveExportWithContentIdentifier ia loc cid mkkey p >>= \case
Just k -> do
recordcidkey cidmap db cid k
return $ Just (loc, k)
Nothing -> return Nothing
mkkey f = error "TODO"
getcidkey cidmap db cid = liftIO $
CID.getContentIdentifierKeys db (Remote.uuid remote) cid >>= \case
[] -> atomically $ do
m <- readTVar cidmap
-- force lookup inside STM transaction
let !v = maybeToList $ M.lookup cid m
return v
l -> return l
recordcidkey cidmap db cid k = do
liftIO $ atomically $ modifyTVar' cidmap $
M.insert cid k
liftIO $ CID.recordContentIdentifier db (Remote.uuid remote) cid k
recordContentIdentifier (Remote.uuid remote) cid k

View file

@ -253,11 +253,7 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do
parentcommit <- frombranch Git.Ref.sha parentcommit <- frombranch Git.Ref.sha
let importcommitconfig = ImportCommitConfig parentcommit ManualCommit importmessage let importcommitconfig = ImportCommitConfig parentcommit ManualCommit importmessage
showStart' "import" (Just (Remote.name remote)) importable <- download =<< enumerate
-- TODO enumerate and download
let importable = ImportableContents [] []
showEndOk
void $ includeCommandAction $ void $ includeCommandAction $
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable
where where
@ -269,6 +265,20 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do
Just v -> return (Just v) Just v -> return (Just v)
Nothing -> inRepo (a branch) Nothing -> inRepo (a branch)
enumerate = do
showStart' "import" (Just (Remote.name remote))
Remote.listImportableContents (Remote.importActions remote) >>= \case
Nothing -> do
showEndFail
giveup $ "Unable to list contents of " ++ Remote.name remote
Just importable -> do
showEndOk
return importable
download importablecontents = downloadImport remote importablecontents >>= \case
Nothing -> giveup $ "Failed to import some files from " ++ Remote.name remote ++ ". Re-run command to resume import."
Just importable -> return importable
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable = do commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable = do
showStart' "update" (Just $ fromRef $ fromRemoteTrackingBranch tb) showStart' "update" (Just $ fromRef $ fromRemoteTrackingBranch tb)

View file

@ -258,7 +258,8 @@ data ImportActions a = ImportActions
:: ExportLocation :: ExportLocation
-> ContentIdentifier -> ContentIdentifier
-> (FilePath -> a Key) -> (FilePath -> a Key)
-- ^ callback that generates a key from the downloaded content -- ^ callback that generates a key from the downloaded content,
-- it may rename or delete the file
-> MeterUpdate -> MeterUpdate
-> a (Maybe Key) -> a (Maybe Key)
-- Exports content to an ExportLocation, and returns the -- Exports content to an ExportLocation, and returns the

View file

@ -10,6 +10,13 @@ this.
## implementation notes ## implementation notes
* import can run out of disk space. Should listImportableContents
include the size of the file, so that annex.diskreserve can be checked?
* Should retrieveExportWithContentIdentifier have a FilePath parameter,
to tell it the file to retrieve to? And if so, would it make sense to
have a ContentLocation -> Key, so it can use gitAnnexTmpObjectLocation?
* "git annex import master --from rmt" followed by "git annex import master:sub --from rmt" * "git annex import master --from rmt" followed by "git annex import master:sub --from rmt"
first makes the tracking branch contain only what's in the remote, first makes the tracking branch contain only what's in the remote,
and then grafts what's in the remote into a subdir. Is that the behavior and then grafts what's in the remote into a subdir. Is that the behavior