Honor annex.largefiles when importing a tree from a special remote.

This commit was sponsored by Martin D on Patreon.
This commit is contained in:
Joey Hess 2020-06-23 16:07:18 -04:00
parent 1c1edad620
commit 7757c0e900
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 96 additions and 35 deletions

View file

@ -1,6 +1,6 @@
{- git-annex import from remotes {- git-annex import from remotes
- -
- Copyright 2019 Joey Hess <id@joeyh.name> - Copyright 2019-2020 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -34,6 +34,7 @@ import Annex.LockFile
import Annex.Content import Annex.Content
import Annex.Export import Annex.Export
import Annex.RemoteTrackingBranch import Annex.RemoteTrackingBranch
import Annex.HashObject
import Command import Command
import Backend import Backend
import Types.Key import Types.Key
@ -93,7 +94,7 @@ buildImportCommit
:: Remote :: Remote
-> ImportTreeConfig -> ImportTreeConfig
-> ImportCommitConfig -> ImportCommitConfig
-> ImportableContents Key -> ImportableContents (Either Sha Key)
-> Annex (Maybe Ref) -> Annex (Maybe Ref)
buildImportCommit remote importtreeconfig importcommitconfig importable = buildImportCommit remote importtreeconfig importcommitconfig importable =
case importCommitTracking importcommitconfig of case importCommitTracking importcommitconfig of
@ -246,7 +247,7 @@ buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History t
buildImportTrees buildImportTrees
:: Ref :: Ref
-> Maybe TopFilePath -> Maybe TopFilePath
-> ImportableContents Key -> ImportableContents (Either Sha Key)
-> Annex (History Sha) -> Annex (History Sha)
buildImportTrees basetree msubdir importable = History buildImportTrees basetree msubdir importable = History
<$> (buildtree (importableContents importable) =<< Annex.gitRepo) <$> (buildtree (importableContents importable) =<< Annex.gitRepo)
@ -265,23 +266,31 @@ buildImportTrees basetree msubdir importable = History
Just subdir -> liftIO $ Just subdir -> liftIO $
graftTree' importtree subdir basetree repo hdl graftTree' importtree subdir basetree repo hdl
mktreeitem (loc, k) = do mktreeitem (loc, v) = case v of
let lf = fromImportLocation loc Right k -> do
let treepath = asTopFilePath lf relf <- fromRepo $ fromTopFilePath topf
let topf = asTopFilePath $ symlink <- calcRepo $ gitAnnexLink (fromRawFilePath relf) k
linksha <- hashSymlink symlink
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
Left sha ->
return $ TreeItem treepath (fromTreeItemType TreeFile) sha
where
lf = fromImportLocation loc
treepath = asTopFilePath lf
topf = asTopFilePath $
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
relf <- fromRepo $ fromTopFilePath topf
symlink <- calcRepo $ gitAnnexLink (fromRawFilePath relf) k
linksha <- hashSymlink symlink
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
{- Downloads all new ContentIdentifiers as needed to generate Keys. {- Downloads all new ContentIdentifiers. Supports concurrency when enabled.
- Supports concurrency when enabled.
- -
- If any download fails, the whole thing fails with Nothing, - If any download fails, the whole thing fails with Nothing,
- but it will resume where it left off. - but it will resume where it left off.
-
- Generates either a Key or a git Sha, depending on annex.largefiles.
- Note that, when a ContentIdentifiers has been imported before,
- annex.largefiles is not reapplied, so will result in how ever that
- content was stored in the repo before.
-} -}
downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents Key)) downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents (Either Sha Key)))
downloadImport remote importtreeconfig importablecontents = do downloadImport remote importtreeconfig importablecontents = do
-- This map is used to remember content identifiers that -- This map is used to remember content identifiers that
-- were just downloaded, before they have necessarily been -- were just downloaded, before they have necessarily been
@ -300,8 +309,9 @@ downloadImport remote importtreeconfig importablecontents = do
go False cidmap downloading importablecontents db go False cidmap downloading importablecontents db
where where
go oldversion cidmap downloading (ImportableContents l h) db = do go oldversion cidmap downloading (ImportableContents l h) db = do
largematcher <- largeFilesMatcher
jobs <- forM l $ \i -> jobs <- forM l $ \i ->
startdownload cidmap downloading db i oldversion startdownload cidmap downloading db i oldversion largematcher
l' <- liftIO $ forM jobs $ l' <- liftIO $ forM jobs $
either pure (atomically . takeTMVar) either pure (atomically . takeTMVar)
if any isNothing l' if any isNothing l'
@ -325,15 +335,25 @@ downloadImport remote importtreeconfig importablecontents = do
s <- readTVar downloading s <- readTVar downloading
writeTVar downloading $ S.delete cid s writeTVar downloading $ S.delete cid s
startdownload cidmap downloading db i@(loc, (cid, _sz)) oldversion = getcidkey cidmap db cid >>= \case startdownload cidmap downloading db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
(k:_) -> return $ Left $ Just (loc, k) (k:ks) ->
-- If the same content was imported before
-- yeilding multiple different keys, it's not clear
-- which is best to use this time, so pick the
-- first in the list. But, if any of them is a
-- git sha, use it, because the content must
-- be included in the git repo then.
let v = case mapMaybe keyGitSha (k:ks) of
(sha:_) -> Left sha
[] -> Right k
in return $ Left $ Just (loc, v)
[] -> do [] -> do
job <- liftIO $ newEmptyTMVarIO job <- liftIO $ newEmptyTMVarIO
let ai = ActionItemOther (Just (fromRawFilePath (fromImportLocation loc))) let ai = ActionItemOther (Just (fromRawFilePath (fromImportLocation loc)))
let downloadaction = starting ("import " ++ Remote.name remote) ai $ do let downloadaction = starting ("import " ++ Remote.name remote) ai $ do
when oldversion $ when oldversion $
showNote "old version" showNote "old version"
tryNonAsync (download cidmap db i) >>= \case tryNonAsync (download cidmap db i largematcher) >>= \case
Left e -> next $ do Left e -> next $ do
warning (show e) warning (show e)
liftIO $ atomically $ liftIO $ atomically $
@ -349,17 +369,22 @@ downloadImport remote importtreeconfig importablecontents = do
downloadaction downloadaction
return (Right job) return (Right job)
download cidmap db (loc, (cid, sz)) = do download cidmap db (loc, (cid, sz)) largematcher = do
let downloader tmpfile p = do let downloader tmpfile p = do
k <- Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile) p k <- Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile largematcher) p
ok <- moveAnnex k tmpfile case keyGitSha k of
return (k, ok) Nothing -> do
ok <- moveAnnex k tmpfile
when ok $ do
recordcidkey cidmap db cid k
logStatus k InfoPresent
logChange k (Remote.uuid remote) InfoPresent
return (Right k, ok)
Just sha -> do
recordcidkey cidmap db cid k
return (Left sha, True)
let rundownload tmpfile p = tryNonAsync (downloader tmpfile p) >>= \case let rundownload tmpfile p = tryNonAsync (downloader tmpfile p) >>= \case
Right (k, True) -> do Right (v, True) -> return $ Just (loc, v)
recordcidkey cidmap db cid k
logStatus k InfoPresent
logChange k (Remote.uuid remote) InfoPresent
return $ Just (loc, k)
Right (_, False) -> return Nothing Right (_, False) -> return Nothing
Left e -> do Left e -> do
warning (show e) warning (show e)
@ -372,15 +397,24 @@ downloadImport remote importtreeconfig importablecontents = do
ia = Remote.importActions remote ia = Remote.importActions remote
tmpkey = importKey cid sz tmpkey = importKey cid sz
mkkey loc tmpfile = do mkkey loc tmpfile largematcher = do
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
backend <- chooseBackend (fromRawFilePath f) matcher <- largematcher (fromRawFilePath f)
let ks = KeySource let mi = MatchingFile FileInfo
{ keyFilename = f { matchFile = f
, contentLocation = toRawFilePath tmpfile , currFile = toRawFilePath tmpfile
, inodeCache = Nothing
} }
fst <$> genKey ks nullMeterUpdate backend islargefile <- checkMatcher' matcher mi mempty
if islargefile
then do
backend <- chooseBackend (fromRawFilePath f)
let ks = KeySource
{ keyFilename = f
, contentLocation = toRawFilePath tmpfile
, inodeCache = Nothing
}
fst <$> genKey ks nullMeterUpdate backend
else gitShaKey <$> hashFile tmpfile
locworktreefilename loc = asTopFilePath $ case importtreeconfig of locworktreefilename loc = asTopFilePath $ case importtreeconfig of
ImportTree -> fromImportLocation loc ImportTree -> fromImportLocation loc

View file

@ -1,5 +1,6 @@
git-annex (8.20200618) UNRELEASED; urgency=medium git-annex (8.20200618) UNRELEASED; urgency=medium
* Honor annex.largefiles when importing a tree from a special remote.
* Fix a deadlock that could occur after git-annex got an unlocked * Fix a deadlock that could occur after git-annex got an unlocked
file, causing the command to hang indefinitely. Known to happen on file, causing the command to hang indefinitely. Known to happen on
vfat filesystems, possibly others. vfat filesystems, possibly others.

View file

@ -307,7 +307,7 @@ listContents remote tvar = starting "list" (ActionItemOther (Just (Remote.name r
liftIO $ atomically $ writeTVar tvar (Just importable') liftIO $ atomically $ writeTVar tvar (Just importable')
return True return True
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents (Either Sha Key) -> CommandStart
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable = commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable =
starting "update" (ActionItemOther (Just $ fromRef $ fromRemoteTrackingBranch tb)) $ do starting "update" (ActionItemOther (Just $ fromRef $ fromRemoteTrackingBranch tb)) $ do
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig importable importcommit <- buildImportCommit remote importtreeconfig importcommitconfig importable

View file

@ -0,0 +1,13 @@
New feature today: Implemented [[todo/import_tree_should_honor_annex.largefiles]].
This only took an hour to implement, but I had to think for several hours
first to get a solid understanding of it. Particularly, what happens if
a file on a remote has a name that makes it be treated as non-large, but
then in a later import, it's renamed to a name that would be treated as
large? (Or vice-versa.) My conclusion is that is equivilant to `git
annex add` of a file with the first name followed by `git mv`, so it's
ok for annex.largefiles to not take effect in such a case.
----
Today's work was sponsored by Martin D [on Patreon](https://patreon.com/joeyh).

View file

@ -0,0 +1,11 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2020-06-23T19:38:10Z"
content="""
Good news, it's implemented, will be in the daily builds soon and the next
release.
Also you sponsored the [[day's work on it|devblog/day_625__import_tree_largefiles]] --
thanks for the Patreon support!
"""]]

View file

@ -69,3 +69,5 @@ annex.largefiles.
> > > >
> > Still, this doesn't feel like a reason not to implement the feature, > > Still, this doesn't feel like a reason not to implement the feature,
> > necessarily. > > necessarily.
[[done]]