Honor annex.largefiles when importing a tree from a special remote.
This commit was sponsored by Martin D on Patreon.
This commit is contained in:
parent
1c1edad620
commit
7757c0e900
6 changed files with 96 additions and 35 deletions
102
Annex/Import.hs
102
Annex/Import.hs
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex import from remotes
|
{- git-annex import from remotes
|
||||||
-
|
-
|
||||||
- Copyright 2019 Joey Hess <id@joeyh.name>
|
- Copyright 2019-2020 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -34,6 +34,7 @@ import Annex.LockFile
|
||||||
import Annex.Content
|
import Annex.Content
|
||||||
import Annex.Export
|
import Annex.Export
|
||||||
import Annex.RemoteTrackingBranch
|
import Annex.RemoteTrackingBranch
|
||||||
|
import Annex.HashObject
|
||||||
import Command
|
import Command
|
||||||
import Backend
|
import Backend
|
||||||
import Types.Key
|
import Types.Key
|
||||||
|
@ -93,7 +94,7 @@ buildImportCommit
|
||||||
:: Remote
|
:: Remote
|
||||||
-> ImportTreeConfig
|
-> ImportTreeConfig
|
||||||
-> ImportCommitConfig
|
-> ImportCommitConfig
|
||||||
-> ImportableContents Key
|
-> ImportableContents (Either Sha Key)
|
||||||
-> Annex (Maybe Ref)
|
-> Annex (Maybe Ref)
|
||||||
buildImportCommit remote importtreeconfig importcommitconfig importable =
|
buildImportCommit remote importtreeconfig importcommitconfig importable =
|
||||||
case importCommitTracking importcommitconfig of
|
case importCommitTracking importcommitconfig of
|
||||||
|
@ -246,7 +247,7 @@ buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History t
|
||||||
buildImportTrees
|
buildImportTrees
|
||||||
:: Ref
|
:: Ref
|
||||||
-> Maybe TopFilePath
|
-> Maybe TopFilePath
|
||||||
-> ImportableContents Key
|
-> ImportableContents (Either Sha Key)
|
||||||
-> Annex (History Sha)
|
-> Annex (History Sha)
|
||||||
buildImportTrees basetree msubdir importable = History
|
buildImportTrees basetree msubdir importable = History
|
||||||
<$> (buildtree (importableContents importable) =<< Annex.gitRepo)
|
<$> (buildtree (importableContents importable) =<< Annex.gitRepo)
|
||||||
|
@ -265,23 +266,31 @@ buildImportTrees basetree msubdir importable = History
|
||||||
Just subdir -> liftIO $
|
Just subdir -> liftIO $
|
||||||
graftTree' importtree subdir basetree repo hdl
|
graftTree' importtree subdir basetree repo hdl
|
||||||
|
|
||||||
mktreeitem (loc, k) = do
|
mktreeitem (loc, v) = case v of
|
||||||
let lf = fromImportLocation loc
|
Right k -> do
|
||||||
let treepath = asTopFilePath lf
|
relf <- fromRepo $ fromTopFilePath topf
|
||||||
let topf = asTopFilePath $
|
symlink <- calcRepo $ gitAnnexLink (fromRawFilePath relf) k
|
||||||
|
linksha <- hashSymlink symlink
|
||||||
|
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
|
||||||
|
Left sha ->
|
||||||
|
return $ TreeItem treepath (fromTreeItemType TreeFile) sha
|
||||||
|
where
|
||||||
|
lf = fromImportLocation loc
|
||||||
|
treepath = asTopFilePath lf
|
||||||
|
topf = asTopFilePath $
|
||||||
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
|
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
|
||||||
relf <- fromRepo $ fromTopFilePath topf
|
|
||||||
symlink <- calcRepo $ gitAnnexLink (fromRawFilePath relf) k
|
|
||||||
linksha <- hashSymlink symlink
|
|
||||||
return $ TreeItem treepath (fromTreeItemType TreeSymlink) linksha
|
|
||||||
|
|
||||||
{- Downloads all new ContentIdentifiers as needed to generate Keys.
|
{- Downloads all new ContentIdentifiers. Supports concurrency when enabled.
|
||||||
- Supports concurrency when enabled.
|
|
||||||
-
|
-
|
||||||
- If any download fails, the whole thing fails with Nothing,
|
- If any download fails, the whole thing fails with Nothing,
|
||||||
- but it will resume where it left off.
|
- but it will resume where it left off.
|
||||||
|
-
|
||||||
|
- Generates either a Key or a git Sha, depending on annex.largefiles.
|
||||||
|
- Note that, when a ContentIdentifiers has been imported before,
|
||||||
|
- annex.largefiles is not reapplied, so will result in how ever that
|
||||||
|
- content was stored in the repo before.
|
||||||
-}
|
-}
|
||||||
downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents Key))
|
downloadImport :: Remote -> ImportTreeConfig -> ImportableContents (ContentIdentifier, ByteSize) -> Annex (Maybe (ImportableContents (Either Sha Key)))
|
||||||
downloadImport remote importtreeconfig importablecontents = do
|
downloadImport remote importtreeconfig importablecontents = do
|
||||||
-- This map is used to remember content identifiers that
|
-- This map is used to remember content identifiers that
|
||||||
-- were just downloaded, before they have necessarily been
|
-- were just downloaded, before they have necessarily been
|
||||||
|
@ -300,8 +309,9 @@ downloadImport remote importtreeconfig importablecontents = do
|
||||||
go False cidmap downloading importablecontents db
|
go False cidmap downloading importablecontents db
|
||||||
where
|
where
|
||||||
go oldversion cidmap downloading (ImportableContents l h) db = do
|
go oldversion cidmap downloading (ImportableContents l h) db = do
|
||||||
|
largematcher <- largeFilesMatcher
|
||||||
jobs <- forM l $ \i ->
|
jobs <- forM l $ \i ->
|
||||||
startdownload cidmap downloading db i oldversion
|
startdownload cidmap downloading db i oldversion largematcher
|
||||||
l' <- liftIO $ forM jobs $
|
l' <- liftIO $ forM jobs $
|
||||||
either pure (atomically . takeTMVar)
|
either pure (atomically . takeTMVar)
|
||||||
if any isNothing l'
|
if any isNothing l'
|
||||||
|
@ -325,15 +335,25 @@ downloadImport remote importtreeconfig importablecontents = do
|
||||||
s <- readTVar downloading
|
s <- readTVar downloading
|
||||||
writeTVar downloading $ S.delete cid s
|
writeTVar downloading $ S.delete cid s
|
||||||
|
|
||||||
startdownload cidmap downloading db i@(loc, (cid, _sz)) oldversion = getcidkey cidmap db cid >>= \case
|
startdownload cidmap downloading db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
|
||||||
(k:_) -> return $ Left $ Just (loc, k)
|
(k:ks) ->
|
||||||
|
-- If the same content was imported before
|
||||||
|
-- yeilding multiple different keys, it's not clear
|
||||||
|
-- which is best to use this time, so pick the
|
||||||
|
-- first in the list. But, if any of them is a
|
||||||
|
-- git sha, use it, because the content must
|
||||||
|
-- be included in the git repo then.
|
||||||
|
let v = case mapMaybe keyGitSha (k:ks) of
|
||||||
|
(sha:_) -> Left sha
|
||||||
|
[] -> Right k
|
||||||
|
in return $ Left $ Just (loc, v)
|
||||||
[] -> do
|
[] -> do
|
||||||
job <- liftIO $ newEmptyTMVarIO
|
job <- liftIO $ newEmptyTMVarIO
|
||||||
let ai = ActionItemOther (Just (fromRawFilePath (fromImportLocation loc)))
|
let ai = ActionItemOther (Just (fromRawFilePath (fromImportLocation loc)))
|
||||||
let downloadaction = starting ("import " ++ Remote.name remote) ai $ do
|
let downloadaction = starting ("import " ++ Remote.name remote) ai $ do
|
||||||
when oldversion $
|
when oldversion $
|
||||||
showNote "old version"
|
showNote "old version"
|
||||||
tryNonAsync (download cidmap db i) >>= \case
|
tryNonAsync (download cidmap db i largematcher) >>= \case
|
||||||
Left e -> next $ do
|
Left e -> next $ do
|
||||||
warning (show e)
|
warning (show e)
|
||||||
liftIO $ atomically $
|
liftIO $ atomically $
|
||||||
|
@ -349,17 +369,22 @@ downloadImport remote importtreeconfig importablecontents = do
|
||||||
downloadaction
|
downloadaction
|
||||||
return (Right job)
|
return (Right job)
|
||||||
|
|
||||||
download cidmap db (loc, (cid, sz)) = do
|
download cidmap db (loc, (cid, sz)) largematcher = do
|
||||||
let downloader tmpfile p = do
|
let downloader tmpfile p = do
|
||||||
k <- Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile) p
|
k <- Remote.retrieveExportWithContentIdentifier ia loc cid tmpfile (mkkey loc tmpfile largematcher) p
|
||||||
ok <- moveAnnex k tmpfile
|
case keyGitSha k of
|
||||||
return (k, ok)
|
Nothing -> do
|
||||||
|
ok <- moveAnnex k tmpfile
|
||||||
|
when ok $ do
|
||||||
|
recordcidkey cidmap db cid k
|
||||||
|
logStatus k InfoPresent
|
||||||
|
logChange k (Remote.uuid remote) InfoPresent
|
||||||
|
return (Right k, ok)
|
||||||
|
Just sha -> do
|
||||||
|
recordcidkey cidmap db cid k
|
||||||
|
return (Left sha, True)
|
||||||
let rundownload tmpfile p = tryNonAsync (downloader tmpfile p) >>= \case
|
let rundownload tmpfile p = tryNonAsync (downloader tmpfile p) >>= \case
|
||||||
Right (k, True) -> do
|
Right (v, True) -> return $ Just (loc, v)
|
||||||
recordcidkey cidmap db cid k
|
|
||||||
logStatus k InfoPresent
|
|
||||||
logChange k (Remote.uuid remote) InfoPresent
|
|
||||||
return $ Just (loc, k)
|
|
||||||
Right (_, False) -> return Nothing
|
Right (_, False) -> return Nothing
|
||||||
Left e -> do
|
Left e -> do
|
||||||
warning (show e)
|
warning (show e)
|
||||||
|
@ -372,15 +397,24 @@ downloadImport remote importtreeconfig importablecontents = do
|
||||||
ia = Remote.importActions remote
|
ia = Remote.importActions remote
|
||||||
tmpkey = importKey cid sz
|
tmpkey = importKey cid sz
|
||||||
|
|
||||||
mkkey loc tmpfile = do
|
mkkey loc tmpfile largematcher = do
|
||||||
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
|
f <- fromRepo $ fromTopFilePath $ locworktreefilename loc
|
||||||
backend <- chooseBackend (fromRawFilePath f)
|
matcher <- largematcher (fromRawFilePath f)
|
||||||
let ks = KeySource
|
let mi = MatchingFile FileInfo
|
||||||
{ keyFilename = f
|
{ matchFile = f
|
||||||
, contentLocation = toRawFilePath tmpfile
|
, currFile = toRawFilePath tmpfile
|
||||||
, inodeCache = Nothing
|
|
||||||
}
|
}
|
||||||
fst <$> genKey ks nullMeterUpdate backend
|
islargefile <- checkMatcher' matcher mi mempty
|
||||||
|
if islargefile
|
||||||
|
then do
|
||||||
|
backend <- chooseBackend (fromRawFilePath f)
|
||||||
|
let ks = KeySource
|
||||||
|
{ keyFilename = f
|
||||||
|
, contentLocation = toRawFilePath tmpfile
|
||||||
|
, inodeCache = Nothing
|
||||||
|
}
|
||||||
|
fst <$> genKey ks nullMeterUpdate backend
|
||||||
|
else gitShaKey <$> hashFile tmpfile
|
||||||
|
|
||||||
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
|
locworktreefilename loc = asTopFilePath $ case importtreeconfig of
|
||||||
ImportTree -> fromImportLocation loc
|
ImportTree -> fromImportLocation loc
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
git-annex (8.20200618) UNRELEASED; urgency=medium
|
git-annex (8.20200618) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
|
* Honor annex.largefiles when importing a tree from a special remote.
|
||||||
* Fix a deadlock that could occur after git-annex got an unlocked
|
* Fix a deadlock that could occur after git-annex got an unlocked
|
||||||
file, causing the command to hang indefinitely. Known to happen on
|
file, causing the command to hang indefinitely. Known to happen on
|
||||||
vfat filesystems, possibly others.
|
vfat filesystems, possibly others.
|
||||||
|
|
|
@ -307,7 +307,7 @@ listContents remote tvar = starting "list" (ActionItemOther (Just (Remote.name r
|
||||||
liftIO $ atomically $ writeTVar tvar (Just importable')
|
liftIO $ atomically $ writeTVar tvar (Just importable')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents Key -> CommandStart
|
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents (Either Sha Key) -> CommandStart
|
||||||
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable =
|
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable =
|
||||||
starting "update" (ActionItemOther (Just $ fromRef $ fromRemoteTrackingBranch tb)) $ do
|
starting "update" (ActionItemOther (Just $ fromRef $ fromRemoteTrackingBranch tb)) $ do
|
||||||
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig importable
|
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig importable
|
||||||
|
|
13
doc/devblog/day_625__import_tree_largefiles.mdwn
Normal file
13
doc/devblog/day_625__import_tree_largefiles.mdwn
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
New feature today: Implemented [[todo/import_tree_should_honor_annex.largefiles]].
|
||||||
|
|
||||||
|
This only took an hour to implement, but I had to think for several hours
|
||||||
|
first to get a solid understanding of it. Particularly, what happens if
|
||||||
|
a file on a remote has a name that makes it be treated as non-large, but
|
||||||
|
then in a later import, it's renamed to a name that would be treated as
|
||||||
|
large? (Or vice-versa.) My conclusion is that is equivilant to `git
|
||||||
|
annex add` of a file with the first name followed by `git mv`, so it's
|
||||||
|
ok for annex.largefiles to not take effect in such a case.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
Today's work was sponsored by Martin D [on Patreon](https://patreon.com/joeyh).
|
|
@ -0,0 +1,11 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 3"""
|
||||||
|
date="2020-06-23T19:38:10Z"
|
||||||
|
content="""
|
||||||
|
Good news, it's implemented, will be in the daily builds soon and the next
|
||||||
|
release.
|
||||||
|
|
||||||
|
Also you sponsored the [[day's work on it|devblog/day_625__import_tree_largefiles]] --
|
||||||
|
thanks for the Patreon support!
|
||||||
|
"""]]
|
|
@ -69,3 +69,5 @@ annex.largefiles.
|
||||||
> >
|
> >
|
||||||
> > Still, this doesn't feel like a reason not to implement the feature,
|
> > Still, this doesn't feel like a reason not to implement the feature,
|
||||||
> > necessarily.
|
> > necessarily.
|
||||||
|
|
||||||
|
[[done]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue