import downloader complete (untested)

Made some api changes.

listImportableContents needs to provide the size
of the data, so the downloader can check disk free space.

retrieveExportWithContentIdentifier is passed the filepath to write to

Use temporary "CID" key during download of a ContentIdentifier from a
remote, so withTmp can be used and then move the content to the real key
once it's known.
This commit is contained in:
Joey Hess 2019-02-27 13:15:02 -04:00
parent f4b773e9a1
commit 45aacd888b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 64 additions and 36 deletions

View file

@ -5,7 +5,7 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Import (
RemoteTrackingBranch(..),
@ -29,7 +29,12 @@ import qualified Git.Branch
import qualified Annex
import Annex.Link
import Annex.LockFile
import Annex.Content
import Backend
import Types.Key
import Types.KeySource
import Utility.Metered
import Utility.DataUnits
import Logs.Export
import Logs.ContentIdentifier
import qualified Database.Export as Export
@ -185,8 +190,8 @@ buildImportTrees basetree msubdir importable = History
- If any download fails, the whole thing fails, but it will resume where
- it left off.
-}
downloadImport :: Remote -> ImportableContents ContentIdentifier -> Annex (Maybe (ImportableContents Key))
downloadImport remote importablecontents = do
downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents Key))
downloadImport remote importtreeconfig importablecontents = do
-- This map is used to remember content identifiers that
-- were just downloaded, before they have necessarily been
-- stored in the database. This way, if the same content
@ -211,27 +216,45 @@ downloadImport remote importablecontents = do
(catMaybes l')
(catMaybes h')
download cidmap db (loc, cid) = getcidkey cidmap db cid >>= \case
download cidmap db (loc, (cid, sz)) = getcidkey cidmap db cid >>= \case
(k:_) -> return $ Just (loc, k)
[] -> do
-- TODO progresss bar
let p = nullMeterUpdate
let ia = Remote.importActions remote
Remote.retrieveExportWithContentIdentifier ia loc cid mkkey p >>= \case
Just k -> do
recordcidkey cidmap db cid k
return $ Just (loc, k)
Nothing -> return Nothing
[] -> checkDiskSpaceToGet tmpkey Nothing $
withTmp tmpkey $ \tmpfile ->
Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (ingestkey loc tmpfile) p >>= \case
Just k -> do
recordcidkey cidmap db cid k
return $ Just (loc, k)
Nothing -> return Nothing
where
-- TODO progress bar
p = nullMeterUpdate
ia = Remote.importActions remote
tmpkey = importKey cid sz
mkkey f = error "TODO"
ingestkey loc tmpfile = do
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
backend <- chooseBackend f
let ks = KeySource
{ keyFilename = f
, contentLocation = tmpfile
, inodeCache = Nothing
}
genKey ks backend >>= \case
Nothing -> return Nothing
Just (k, _) ->
tryNonAsync (moveAnnex k tmpfile) >>= \case
Right True -> return (Just k)
_ -> return Nothing
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
ImportTree -> fromImportLocation loc
ImportSubTree subdir _ ->
getTopFilePath subdir </> fromImportLocation loc
getcidkey cidmap db cid = liftIO $
CID.getContentIdentifierKeys db (Remote.uuid remote) cid >>= \case
[] -> atomically $ do
m <- readTVar cidmap
-- force lookup inside STM transaction
let !v = maybeToList $ M.lookup cid m
return v
[] -> atomically $
maybeToList . M.lookup cid <$> readTVar cidmap
l -> return l
recordcidkey cidmap db cid k = do
@ -239,3 +262,12 @@ downloadImport remote importablecontents = do
M.insert cid k
liftIO $ CID.recordContentIdentifier db (Remote.uuid remote) cid k
recordContentIdentifier (Remote.uuid remote) cid k
{- Temporary key used for import of a ContentIdentifier while downloading
- content, before generating its real key. -}
importKey :: ContentIdentifier -> Integer -> Key
importKey (ContentIdentifier cid) size = stubKey
{ keyName = cid
, keyVariety = OtherKey "CID"
, keySize = Just size
}

View file

@ -253,7 +253,7 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do
parentcommit <- frombranch Git.Ref.sha
let importcommitconfig = ImportCommitConfig parentcommit ManualCommit importmessage
importable <- download =<< enumerate
importable <- download importtreeconfig =<< enumerate
void $ includeCommandAction $
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable
where
@ -275,9 +275,10 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do
showEndOk
return importable
download importablecontents = downloadImport remote importablecontents >>= \case
Nothing -> giveup $ "Failed to import some files from " ++ Remote.name remote ++ ". Re-run command to resume import."
Just importable -> return importable
download importtreeconfig importablecontents =
downloadImport remote importtreeconfig importablecontents >>= \case
Nothing -> giveup $ "Failed to import some files from " ++ Remote.name remote ++ ". Re-run command to resume import."
Just importable -> return importable
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable = do

View file

@ -53,7 +53,7 @@ instance HasImportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) wh
instance HasImportUnsupported (ImportActions Annex) where
importUnsupported = ImportActions
{ listImportableContents = return Nothing
, retrieveExportWithContentIdentifier = \_ _ _ _ -> return Nothing
, retrieveExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
, storeExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
}

View file

@ -43,6 +43,7 @@ import Utility.Metered
import Git.Types (RemoteName)
import Utility.SafeCommand
import Utility.Url
import Utility.DataUnits
type RemoteConfigKey = String
@ -243,11 +244,11 @@ data ExportActions a = ExportActions
data ImportActions a = ImportActions
-- Finds the current set of files that are stored in the remote,
-- along with their content identifiers.
-- along with their content identifiers and size.
--
-- May also find old versions of files that are still stored in the
-- remote.
{ listImportableContents :: a (Maybe (ImportableContents ContentIdentifier))
{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
-- Retrieves a file from the remote. Ensures that the file
-- it retrieves has the requested ContentIdentifier.
--
@ -257,9 +258,10 @@ data ImportActions a = ImportActions
, retrieveExportWithContentIdentifier
:: ExportLocation
-> ContentIdentifier
-> (FilePath -> a Key)
-- ^ callback that generates a key from the downloaded content,
-- it may rename or delete the file
-> FilePath
-- ^ file to write content to
-> a (Maybe Key)
-- ^ callback that generates a key from the downloaded content
-> MeterUpdate
-> a (Maybe Key)
-- Exports content to an ExportLocation, and returns the

View file

@ -10,13 +10,6 @@ this.
## implementation notes
* import can run out of disk space. Should listImportableContents
include the size of the file, so that annex.diskreserve can be checked?
* Should retrieveExportWithContentIdentifier have a FilePath parameter,
to tell it the file to retrieve to? And if so, would it make sense to
have a ContentLocation -> Key, so it can use gitAnnexTmpObjectLocation?
* "git annex import master --from rmt" followed by "git annex import master:sub --from rmt"
first makes the tracking branch contain only what's in the remote,
and then grafts what's in the remote into a subdir. Is that the behavior