import downloader complete (untested)

Made some api changes.

listImportableContents needs to provide the size
of the data, so the downloader can check disk free space.

retrieveExportWithContentIdentifier is passed the filepath to write to

Use temporary "CID" key during download of a ContentIdentifier from a
remote, so withTmp can be used and then move the content to the real key
once it's known.
This commit is contained in:
Joey Hess 2019-02-27 13:15:02 -04:00
parent f4b773e9a1
commit 45aacd888b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 64 additions and 36 deletions

View file

@ -5,7 +5,7 @@
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-}
module Annex.Import ( module Annex.Import (
RemoteTrackingBranch(..), RemoteTrackingBranch(..),
@ -29,7 +29,12 @@ import qualified Git.Branch
import qualified Annex import qualified Annex
import Annex.Link import Annex.Link
import Annex.LockFile import Annex.LockFile
import Annex.Content
import Backend
import Types.Key
import Types.KeySource
import Utility.Metered import Utility.Metered
import Utility.DataUnits
import Logs.Export import Logs.Export
import Logs.ContentIdentifier import Logs.ContentIdentifier
import qualified Database.Export as Export import qualified Database.Export as Export
@ -185,8 +190,8 @@ buildImportTrees basetree msubdir importable = History
- If any download fails, the whole thing fails, but it will resume where - If any download fails, the whole thing fails, but it will resume where
- it left off. - it left off.
-} -}
downloadImport :: Remote -> ImportableContents ContentIdentifier -> Annex (Maybe (ImportableContents Key)) downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents Key))
downloadImport remote importablecontents = do downloadImport remote importtreeconfig importablecontents = do
-- This map is used to remember content identifiers that -- This map is used to remember content identifiers that
-- were just downloaded, before they have necessarily been -- were just downloaded, before they have necessarily been
-- stored in the database. This way, if the same content -- stored in the database. This way, if the same content
@ -211,27 +216,45 @@ downloadImport remote importablecontents = do
(catMaybes l') (catMaybes l')
(catMaybes h') (catMaybes h')
download cidmap db (loc, cid) = getcidkey cidmap db cid >>= \case download cidmap db (loc, (cid, sz)) = getcidkey cidmap db cid >>= \case
(k:_) -> return $ Just (loc, k) (k:_) -> return $ Just (loc, k)
[] -> do [] -> checkDiskSpaceToGet tmpkey Nothing $
-- TODO progresss bar withTmp tmpkey $ \tmpfile ->
let p = nullMeterUpdate Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (ingestkey loc tmpfile) p >>= \case
let ia = Remote.importActions remote Just k -> do
Remote.retrieveExportWithContentIdentifier ia loc cid mkkey p >>= \case recordcidkey cidmap db cid k
Just k -> do return $ Just (loc, k)
recordcidkey cidmap db cid k Nothing -> return Nothing
return $ Just (loc, k) where
Nothing -> return Nothing -- TODO progress bar
p = nullMeterUpdate
ia = Remote.importActions remote
tmpkey = importKey cid sz
mkkey f = error "TODO" ingestkey loc tmpfile = do
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
backend <- chooseBackend f
let ks = KeySource
{ keyFilename = f
, contentLocation = tmpfile
, inodeCache = Nothing
}
genKey ks backend >>= \case
Nothing -> return Nothing
Just (k, _) ->
tryNonAsync (moveAnnex k tmpfile) >>= \case
Right True -> return (Just k)
_ -> return Nothing
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
ImportTree -> fromImportLocation loc
ImportSubTree subdir _ ->
getTopFilePath subdir </> fromImportLocation loc
getcidkey cidmap db cid = liftIO $ getcidkey cidmap db cid = liftIO $
CID.getContentIdentifierKeys db (Remote.uuid remote) cid >>= \case CID.getContentIdentifierKeys db (Remote.uuid remote) cid >>= \case
[] -> atomically $ do [] -> atomically $
m <- readTVar cidmap maybeToList . M.lookup cid <$> readTVar cidmap
-- force lookup inside STM transaction
let !v = maybeToList $ M.lookup cid m
return v
l -> return l l -> return l
recordcidkey cidmap db cid k = do recordcidkey cidmap db cid k = do
@ -239,3 +262,12 @@ downloadImport remote importablecontents = do
M.insert cid k M.insert cid k
liftIO $ CID.recordContentIdentifier db (Remote.uuid remote) cid k liftIO $ CID.recordContentIdentifier db (Remote.uuid remote) cid k
recordContentIdentifier (Remote.uuid remote) cid k recordContentIdentifier (Remote.uuid remote) cid k
{- Temporary key used for import of a ContentIdentifier while downloading
- content, before generating its real key. -}
importKey :: ContentIdentifier -> Integer -> Key
importKey (ContentIdentifier cid) size = stubKey
{ keyName = cid
, keyVariety = OtherKey "CID"
, keySize = Just size
}

View file

@ -253,7 +253,7 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do
parentcommit <- frombranch Git.Ref.sha parentcommit <- frombranch Git.Ref.sha
let importcommitconfig = ImportCommitConfig parentcommit ManualCommit importmessage let importcommitconfig = ImportCommitConfig parentcommit ManualCommit importmessage
importable <- download =<< enumerate importable <- download importtreeconfig =<< enumerate
void $ includeCommandAction $ void $ includeCommandAction $
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable
where where
@ -275,9 +275,10 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do
showEndOk showEndOk
return importable return importable
download importablecontents = downloadImport remote importablecontents >>= \case download importtreeconfig importablecontents =
Nothing -> giveup $ "Failed to import some files from " ++ Remote.name remote ++ ". Re-run command to resume import." downloadImport remote importtreeconfig importablecontents >>= \case
Just importable -> return importable Nothing -> giveup $ "Failed to import some files from " ++ Remote.name remote ++ ". Re-run command to resume import."
Just importable -> return importable
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable = do commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable = do

View file

@ -53,7 +53,7 @@ instance HasImportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) wh
instance HasImportUnsupported (ImportActions Annex) where instance HasImportUnsupported (ImportActions Annex) where
importUnsupported = ImportActions importUnsupported = ImportActions
{ listImportableContents = return Nothing { listImportableContents = return Nothing
, retrieveExportWithContentIdentifier = \_ _ _ _ -> return Nothing , retrieveExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
, storeExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing , storeExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
} }

View file

@ -43,6 +43,7 @@ import Utility.Metered
import Git.Types (RemoteName) import Git.Types (RemoteName)
import Utility.SafeCommand import Utility.SafeCommand
import Utility.Url import Utility.Url
import Utility.DataUnits
type RemoteConfigKey = String type RemoteConfigKey = String
@ -243,11 +244,11 @@ data ExportActions a = ExportActions
data ImportActions a = ImportActions data ImportActions a = ImportActions
-- Finds the current set of files that are stored in the remote, -- Finds the current set of files that are stored in the remote,
-- along with their content identifiers. -- along with their content identifiers and size.
-- --
-- May also find old versions of files that are still stored in the -- May also find old versions of files that are still stored in the
-- remote. -- remote.
{ listImportableContents :: a (Maybe (ImportableContents ContentIdentifier)) { listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
-- Retrieves a file from the remote. Ensures that the file -- Retrieves a file from the remote. Ensures that the file
-- it retrieves has the requested ContentIdentifier. -- it retrieves has the requested ContentIdentifier.
-- --
@ -257,9 +258,10 @@ data ImportActions a = ImportActions
, retrieveExportWithContentIdentifier , retrieveExportWithContentIdentifier
:: ExportLocation :: ExportLocation
-> ContentIdentifier -> ContentIdentifier
-> (FilePath -> a Key) -> FilePath
-- ^ callback that generates a key from the downloaded content, -- ^ file to write content to
-- it may rename or delete the file -> a (Maybe Key)
-- ^ callback that generates a key from the downloaded content
-> MeterUpdate -> MeterUpdate
-> a (Maybe Key) -> a (Maybe Key)
-- Exports content to an ExportLocation, and returns the -- Exports content to an ExportLocation, and returns the

View file

@ -10,13 +10,6 @@ this.
## implementation notes ## implementation notes
* import can run out of disk space. Should listImportableContents
include the size of the file, so that annex.diskreserve can be checked?
* Should retrieveExportWithContentIdentifier have a FilePath parameter,
to tell it the file to retrieve to? And if so, would it make sense to
have a ContentLocation -> Key, so it can use gitAnnexTmpObjectLocation?
* "git annex import master --from rmt" followed by "git annex import master:sub --from rmt" * "git annex import master --from rmt" followed by "git annex import master:sub --from rmt"
first makes the tracking branch contain only what's in the remote, first makes the tracking branch contain only what's in the remote,
and then grafts what's in the remote into a subdir. Is that the behavior and then grafts what's in the remote into a subdir. Is that the behavior