From f4b773e9a197f4ce7b958d19e1650a865103ca56 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 26 Feb 2019 15:25:28 -0400 Subject: [PATCH] incomplete action to download files from import --- Annex/Import.hs | 83 +++++++++++++++++++++++++++++++++++---- Command/Import.hs | 20 +++++++--- Types/Remote.hs | 3 +- doc/todo/import_tree.mdwn | 7 ++++ 4 files changed, 100 insertions(+), 13 deletions(-) diff --git a/Annex/Import.hs b/Annex/Import.hs index 8d97db7e29..76bd6273fe 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -5,13 +5,16 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE BangPatterns #-} + module Annex.Import ( RemoteTrackingBranch(..), mkRemoteTrackingBranch, ImportTreeConfig(..), ImportCommitConfig(..), buildImportCommit, - buildImportTrees + buildImportTrees, + downloadImport ) where import Annex.Common @@ -26,8 +29,14 @@ import qualified Git.Branch import qualified Annex import Annex.Link import Annex.LockFile +import Utility.Metered import Logs.Export -import Database.Export +import Logs.ContentIdentifier +import qualified Database.Export as Export +import qualified Database.ContentIdentifier as CID + +import Control.Concurrent.STM +import qualified Data.Map.Strict as M newtype RemoteTrackingBranch = RemoteTrackingBranch { fromRemoteTrackingBranch :: Ref } @@ -120,14 +129,14 @@ buildImportCommit remote importtreeconfig importcommitconfig importable = updateexportdb importedtree = withExclusiveLock (gitAnnexExportLock (Remote.uuid remote)) $ do - db <- openDb (Remote.uuid remote) + db <- Export.openDb (Remote.uuid remote) prevtree <- liftIO $ fromMaybe emptyTree - <$> getExportTreeCurrent db + <$> Export.getExportTreeCurrent db when (importedtree /= prevtree) $ do - updateExportTree db prevtree importedtree - liftIO $ recordExportTreeCurrent db importedtree + Export.updateExportTree db prevtree importedtree + liftIO $ Export.recordExportTreeCurrent db importedtree -- TODO: addExportedLocation etc - liftIO $ flushDbQueue db + Export.closeDb db updateexportlog importedtree = do old <- getExport (Remote.uuid remote) @@ -170,3 +179,63 @@ buildImportTrees basetree msubdir importable = History symlink <- calcRepo $ gitAnnexLink relf k linksha <- hashSymlink symlink return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha + +{- Downloads all new ContentIdentifiers. Supports concurrency when enabled. + - + - If any download fails, the whole thing fails, but it will resume where + - it left off. + -} +downloadImport :: Remote -> ImportableContents ContentIdentifier -> Annex (Maybe (ImportableContents Key)) +downloadImport remote importablecontents = do + -- This map is used to remember content identifiers that + -- were just downloaded, before they have necessarily been + -- stored in the database. This way, if the same content + -- identifier appears multiple times in the + -- importablecontents (eg when it has a history), + -- they will only be downloaded once. + cidmap <- liftIO $ newTVarIO M.empty + bracket CID.openDb CID.closeDb (go cidmap importablecontents) + -- TODO really support concurrency; avoid donwloading the same + -- ContentIdentifier twice. + where + go cidmap (ImportableContents l h) db = do + l' <- mapM (download cidmap db) l + if any isNothing l' + then return Nothing + else do + h' <- mapM (\ic -> go cidmap ic db) h + if any isNothing h' + then return Nothing + else return $ Just $ + ImportableContents + (catMaybes l') + (catMaybes h') + + download cidmap db (loc, cid) = getcidkey cidmap db cid >>= \case + (k:_) -> return $ Just (loc, k) + [] -> do + -- TODO progresss bar + let p = nullMeterUpdate + let ia = Remote.importActions remote + Remote.retrieveExportWithContentIdentifier ia loc cid mkkey p >>= \case + Just k -> do + recordcidkey cidmap db cid k + return $ Just (loc, k) + Nothing -> return Nothing + + mkkey f = error "TODO" + + getcidkey cidmap db cid = liftIO $ + CID.getContentIdentifierKeys db (Remote.uuid remote) cid >>= \case + [] -> atomically $ do + m <- readTVar cidmap + -- force lookup inside STM transaction + let !v = maybeToList $ M.lookup cid m + return v + l -> return l + + recordcidkey cidmap db cid k = do + liftIO $ atomically $ modifyTVar' cidmap $ + M.insert cid k + liftIO $ CID.recordContentIdentifier db (Remote.uuid remote) cid k + recordContentIdentifier (Remote.uuid remote) cid k diff --git a/Command/Import.hs b/Command/Import.hs index e8d4682db2..410e0824aa 100644 --- a/Command/Import.hs +++ b/Command/Import.hs @@ -253,11 +253,7 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do parentcommit <- frombranch Git.Ref.sha let importcommitconfig = ImportCommitConfig parentcommit ManualCommit importmessage - showStart' "import" (Just (Remote.name remote)) - -- TODO enumerate and download - let importable = ImportableContents [] [] - showEndOk - + importable <- download =<< enumerate void $ includeCommandAction $ commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable where @@ -269,6 +265,20 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do Just v -> return (Just v) Nothing -> inRepo (a branch) + enumerate = do + showStart' "import" (Just (Remote.name remote)) + Remote.listImportableContents (Remote.importActions remote) >>= \case + Nothing -> do + showEndFail + giveup $ "Unable to list contents of " ++ Remote.name remote + Just importable -> do + showEndOk + return importable + + download importablecontents = downloadImport remote importablecontents >>= \case + Nothing -> giveup $ "Failed to import some files from " ++ Remote.name remote ++ ". Re-run command to resume import." + Just importable -> return importable + commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable = do showStart' "update" (Just $ fromRef $ fromRemoteTrackingBranch tb) diff --git a/Types/Remote.hs b/Types/Remote.hs index 87e1bbd38b..fe7efa7b54 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -258,7 +258,8 @@ data ImportActions a = ImportActions :: ExportLocation -> ContentIdentifier -> (FilePath -> a Key) - -- ^ callback that generates a key from the downloaded content + -- ^ callback that generates a key from the downloaded content, + -- it may rename or delete the file -> MeterUpdate -> a (Maybe Key) -- Exports content to an ExportLocation, and returns the diff --git a/doc/todo/import_tree.mdwn b/doc/todo/import_tree.mdwn index 2efe2be11f..bd304adf35 100644 --- a/doc/todo/import_tree.mdwn +++ b/doc/todo/import_tree.mdwn @@ -10,6 +10,13 @@ this. ## implementation notes +* import can run out of disk space. Should listImportableContents + include the size of the file, so that annex.diskreserve can be checked? + +* Should retrieveExportWithContentIdentifier have a FilePath parameter, + to tell it the file to retrieve to? And if so, would it make sense to + have a ContentLocation -> Key, so it can use gitAnnexTmpObjectLocation? + * "git annex import master --from rmt" followed by "git annex import master:sub --from rmt" first makes the tracking branch contain only what's in the remote, and then grafts what's in the remote into a subdir. Is that the behavior