import downloader complete (untested)
Made some api changes. listImportableContents needs to provide the size of the data, so the downloader can check disk free space. retrieveExportWithContentIdentifier is passed the filepath to write to Use temporary "CID" key during download of a ContentIdentifier from a remote, so withTmp can be used and then move the content to the real key once it's known.
This commit is contained in:
parent
f4b773e9a1
commit
45aacd888b
5 changed files with 64 additions and 36 deletions
|
@ -5,7 +5,7 @@
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-# LANGUAGE BangPatterns #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
module Annex.Import (
|
module Annex.Import (
|
||||||
RemoteTrackingBranch(..),
|
RemoteTrackingBranch(..),
|
||||||
|
@ -29,7 +29,12 @@ import qualified Git.Branch
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import Annex.Link
|
import Annex.Link
|
||||||
import Annex.LockFile
|
import Annex.LockFile
|
||||||
|
import Annex.Content
|
||||||
|
import Backend
|
||||||
|
import Types.Key
|
||||||
|
import Types.KeySource
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
|
import Utility.DataUnits
|
||||||
import Logs.Export
|
import Logs.Export
|
||||||
import Logs.ContentIdentifier
|
import Logs.ContentIdentifier
|
||||||
import qualified Database.Export as Export
|
import qualified Database.Export as Export
|
||||||
|
@ -185,8 +190,8 @@ buildImportTrees basetree msubdir importable = History
|
||||||
- If any download fails, the whole thing fails, but it will resume where
|
- If any download fails, the whole thing fails, but it will resume where
|
||||||
- it left off.
|
- it left off.
|
||||||
-}
|
-}
|
||||||
downloadImport :: Remote -> ImportableContents ContentIdentifier -> Annex (Maybe (ImportableContents Key))
|
downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents Key))
|
||||||
downloadImport remote importablecontents = do
|
downloadImport remote importtreeconfig importablecontents = do
|
||||||
-- This map is used to remember content identifiers that
|
-- This map is used to remember content identifiers that
|
||||||
-- were just downloaded, before they have necessarily been
|
-- were just downloaded, before they have necessarily been
|
||||||
-- stored in the database. This way, if the same content
|
-- stored in the database. This way, if the same content
|
||||||
|
@ -211,27 +216,45 @@ downloadImport remote importablecontents = do
|
||||||
(catMaybes l')
|
(catMaybes l')
|
||||||
(catMaybes h')
|
(catMaybes h')
|
||||||
|
|
||||||
download cidmap db (loc, cid) = getcidkey cidmap db cid >>= \case
|
download cidmap db (loc, (cid, sz)) = getcidkey cidmap db cid >>= \case
|
||||||
(k:_) -> return $ Just (loc, k)
|
(k:_) -> return $ Just (loc, k)
|
||||||
[] -> do
|
[] -> checkDiskSpaceToGet tmpkey Nothing $
|
||||||
-- TODO progresss bar
|
withTmp tmpkey $ \tmpfile ->
|
||||||
let p = nullMeterUpdate
|
Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (ingestkey loc tmpfile) p >>= \case
|
||||||
let ia = Remote.importActions remote
|
Just k -> do
|
||||||
Remote.retrieveExportWithContentIdentifier ia loc cid mkkey p >>= \case
|
recordcidkey cidmap db cid k
|
||||||
Just k -> do
|
return $ Just (loc, k)
|
||||||
recordcidkey cidmap db cid k
|
Nothing -> return Nothing
|
||||||
return $ Just (loc, k)
|
where
|
||||||
Nothing -> return Nothing
|
-- TODO progress bar
|
||||||
|
p = nullMeterUpdate
|
||||||
|
ia = Remote.importActions remote
|
||||||
|
tmpkey = importKey cid sz
|
||||||
|
|
||||||
mkkey f = error "TODO"
|
ingestkey loc tmpfile = do
|
||||||
|
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
|
||||||
|
backend <- chooseBackend f
|
||||||
|
let ks = KeySource
|
||||||
|
{ keyFilename = f
|
||||||
|
, contentLocation = tmpfile
|
||||||
|
, inodeCache = Nothing
|
||||||
|
}
|
||||||
|
genKey ks backend >>= \case
|
||||||
|
Nothing -> return Nothing
|
||||||
|
Just (k, _) ->
|
||||||
|
tryNonAsync (moveAnnex k tmpfile) >>= \case
|
||||||
|
Right True -> return (Just k)
|
||||||
|
_ -> return Nothing
|
||||||
|
|
||||||
|
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
|
||||||
|
ImportTree -> fromImportLocation loc
|
||||||
|
ImportSubTree subdir _ ->
|
||||||
|
getTopFilePath subdir </> fromImportLocation loc
|
||||||
|
|
||||||
getcidkey cidmap db cid = liftIO $
|
getcidkey cidmap db cid = liftIO $
|
||||||
CID.getContentIdentifierKeys db (Remote.uuid remote) cid >>= \case
|
CID.getContentIdentifierKeys db (Remote.uuid remote) cid >>= \case
|
||||||
[] -> atomically $ do
|
[] -> atomically $
|
||||||
m <- readTVar cidmap
|
maybeToList . M.lookup cid <$> readTVar cidmap
|
||||||
-- force lookup inside STM transaction
|
|
||||||
let !v = maybeToList $ M.lookup cid m
|
|
||||||
return v
|
|
||||||
l -> return l
|
l -> return l
|
||||||
|
|
||||||
recordcidkey cidmap db cid k = do
|
recordcidkey cidmap db cid k = do
|
||||||
|
@ -239,3 +262,12 @@ downloadImport remote importablecontents = do
|
||||||
M.insert cid k
|
M.insert cid k
|
||||||
liftIO $ CID.recordContentIdentifier db (Remote.uuid remote) cid k
|
liftIO $ CID.recordContentIdentifier db (Remote.uuid remote) cid k
|
||||||
recordContentIdentifier (Remote.uuid remote) cid k
|
recordContentIdentifier (Remote.uuid remote) cid k
|
||||||
|
|
||||||
|
{- Temporary key used for import of a ContentIdentifier while downloading
|
||||||
|
- content, before generating its real key. -}
|
||||||
|
importKey :: ContentIdentifier -> Integer -> Key
|
||||||
|
importKey (ContentIdentifier cid) size = stubKey
|
||||||
|
{ keyName = cid
|
||||||
|
, keyVariety = OtherKey "CID"
|
||||||
|
, keySize = Just size
|
||||||
|
}
|
||||||
|
|
|
@ -253,7 +253,7 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do
|
||||||
parentcommit <- frombranch Git.Ref.sha
|
parentcommit <- frombranch Git.Ref.sha
|
||||||
let importcommitconfig = ImportCommitConfig parentcommit ManualCommit importmessage
|
let importcommitconfig = ImportCommitConfig parentcommit ManualCommit importmessage
|
||||||
|
|
||||||
importable <- download =<< enumerate
|
importable <- download importtreeconfig =<< enumerate
|
||||||
void $ includeCommandAction $
|
void $ includeCommandAction $
|
||||||
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable
|
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable
|
||||||
where
|
where
|
||||||
|
@ -275,9 +275,10 @@ seekRemote remote branch msubdir = allowConcurrentOutput $ do
|
||||||
showEndOk
|
showEndOk
|
||||||
return importable
|
return importable
|
||||||
|
|
||||||
download importablecontents = downloadImport remote importablecontents >>= \case
|
download importtreeconfig importablecontents =
|
||||||
Nothing -> giveup $ "Failed to import some files from " ++ Remote.name remote ++ ". Re-run command to resume import."
|
downloadImport remote importtreeconfig importablecontents >>= \case
|
||||||
Just importable -> return importable
|
Nothing -> giveup $ "Failed to import some files from " ++ Remote.name remote ++ ". Re-run command to resume import."
|
||||||
|
Just importable -> return importable
|
||||||
|
|
||||||
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart
|
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart
|
||||||
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable = do
|
commitRemote remote branch tb parentcommit importtreeconfig importcommitconfig importable = do
|
||||||
|
|
|
@ -53,7 +53,7 @@ instance HasImportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) wh
|
||||||
instance HasImportUnsupported (ImportActions Annex) where
|
instance HasImportUnsupported (ImportActions Annex) where
|
||||||
importUnsupported = ImportActions
|
importUnsupported = ImportActions
|
||||||
{ listImportableContents = return Nothing
|
{ listImportableContents = return Nothing
|
||||||
, retrieveExportWithContentIdentifier = \_ _ _ _ -> return Nothing
|
, retrieveExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
|
||||||
, storeExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
|
, storeExportWithContentIdentifier = \_ _ _ _ _ -> return Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,7 @@ import Utility.Metered
|
||||||
import Git.Types (RemoteName)
|
import Git.Types (RemoteName)
|
||||||
import Utility.SafeCommand
|
import Utility.SafeCommand
|
||||||
import Utility.Url
|
import Utility.Url
|
||||||
|
import Utility.DataUnits
|
||||||
|
|
||||||
type RemoteConfigKey = String
|
type RemoteConfigKey = String
|
||||||
|
|
||||||
|
@ -243,11 +244,11 @@ data ExportActions a = ExportActions
|
||||||
|
|
||||||
data ImportActions a = ImportActions
|
data ImportActions a = ImportActions
|
||||||
-- Finds the current set of files that are stored in the remote,
|
-- Finds the current set of files that are stored in the remote,
|
||||||
-- along with their content identifiers.
|
-- along with their content identifiers and size.
|
||||||
--
|
--
|
||||||
-- May also find old versions of files that are still stored in the
|
-- May also find old versions of files that are still stored in the
|
||||||
-- remote.
|
-- remote.
|
||||||
{ listImportableContents :: a (Maybe (ImportableContents ContentIdentifier))
|
{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
||||||
-- Retrieves a file from the remote. Ensures that the file
|
-- Retrieves a file from the remote. Ensures that the file
|
||||||
-- it retrieves has the requested ContentIdentifier.
|
-- it retrieves has the requested ContentIdentifier.
|
||||||
--
|
--
|
||||||
|
@ -257,9 +258,10 @@ data ImportActions a = ImportActions
|
||||||
, retrieveExportWithContentIdentifier
|
, retrieveExportWithContentIdentifier
|
||||||
:: ExportLocation
|
:: ExportLocation
|
||||||
-> ContentIdentifier
|
-> ContentIdentifier
|
||||||
-> (FilePath -> a Key)
|
-> FilePath
|
||||||
-- ^ callback that generates a key from the downloaded content,
|
-- ^ file to write content to
|
||||||
-- it may rename or delete the file
|
-> a (Maybe Key)
|
||||||
|
-- ^ callback that generates a key from the downloaded content
|
||||||
-> MeterUpdate
|
-> MeterUpdate
|
||||||
-> a (Maybe Key)
|
-> a (Maybe Key)
|
||||||
-- Exports content to an ExportLocation, and returns the
|
-- Exports content to an ExportLocation, and returns the
|
||||||
|
|
|
@ -10,13 +10,6 @@ this.
|
||||||
|
|
||||||
## implementation notes
|
## implementation notes
|
||||||
|
|
||||||
* import can run out of disk space. Should listImportableContents
|
|
||||||
include the size of the file, so that annex.diskreserve can be checked?
|
|
||||||
|
|
||||||
* Should retrieveExportWithContentIdentifier have a FilePath parameter,
|
|
||||||
to tell it the file to retrieve to? And if so, would it make sense to
|
|
||||||
have a ContentLocation -> Key, so it can use gitAnnexTmpObjectLocation?
|
|
||||||
|
|
||||||
* "git annex import master --from rmt" followed by "git annex import master:sub --from rmt"
|
* "git annex import master --from rmt" followed by "git annex import master:sub --from rmt"
|
||||||
first makes the tracking branch contain only what's in the remote,
|
first makes the tracking branch contain only what's in the remote,
|
||||||
and then grafts what's in the remote into a subdir. Is that the behavior
|
and then grafts what's in the remote into a subdir. Is that the behavior
|
||||||
|
|
Loading…
Reference in a new issue