listImportableContents filtering to wanted files

This could in theory allow importing subsets of files with less memory
use. Rather than building up a big import list and then filtering it to
a smaller list of wanted files, support optionally filtering wanted
files first.

So far, the directory special remote implements it and will probably use
less memory. (Since dirContentsRecursiveSkipping does lazy streaming.)

Implementation in Remote.S3 is incomplete and fails to compile. Bit of a
mess with ResourceT needing to use Annex.

Also, in Remote.S3, filtering is not done for old versions.
And mkImportableContentsUnversioned is doing now redundant work
to filterwanted.
This commit is contained in:
Joey Hess 2023-12-20 15:48:26 -04:00
parent d7ca716759
commit 41edf73789
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 112 additions and 50 deletions

View file

@ -293,7 +293,7 @@ buildImportTrees basetree msubdir (ImportedDiff (LastImportedTree oldtree) impor
let (removed, new) = partition isremoved
(importableContents imported)
newtreeitems <- catMaybes <$> mapM mktreeitem new
let removedfiles = map (mkloc . fst) removed
let removedfiles = map (\(_, loc, _) -> mkloc loc) removed
inRepo $ adjustTree
(pure . Just)
-- ^ keep files that are not added/removed the same
@ -303,14 +303,14 @@ buildImportTrees basetree msubdir (ImportedDiff (LastImportedTree oldtree) impor
removedfiles
oldtree
mktreeitem (loc, DiffChanged v) =
mktreeitem (_, loc, DiffChanged v) =
Just <$> mkImportTreeItem msubdir loc v
mktreeitem (_, DiffRemoved) =
mktreeitem (_, _, DiffRemoved) =
pure Nothing
mkloc = asTopFilePath . fromImportLocation
isremoved (_, v) = v == DiffRemoved
isremoved (_, _, v) = v == DiffRemoved
convertImportTree :: Maybe TopFilePath -> [(ImportLocation, Either Sha Key)] -> Annex Tree
convertImportTree msubdir ls =
@ -429,11 +429,12 @@ buildImportTree
:: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree)
-> Ref
-> Maybe TopFilePath
-> [(ImportLocation, v)]
-> [(ImportWantedChecked, ImportLocation, v)]
-> MkTreeHandle
-> Annex Sha
buildImportTree converttree basetree msubdir ls hdl = do
importtree <- liftIO . recordTree' hdl =<< converttree msubdir ls
importtree <- liftIO . recordTree' hdl
=<< converttree msubdir (map (\(_, loc, v) -> (loc, v)) ls)
graftImportTree basetree msubdir importtree hdl
graftImportTree
@ -545,7 +546,7 @@ importChanges remote importtreeconfig importcontent thirdpartypopulated importab
let (removed, changed) = partition isremoval diff
let mkicchanged ti = do
v <- M.lookup (Git.DiffTree.dstsha ti) cidtreemap
return (mkloc ti, v)
return (ImportWantedChecked False, mkloc ti, v)
let ic = ImportableContentsComplete $ ImportableContents
{ importableContents = mapMaybe mkicchanged changed
, importableHistory = []
@ -576,10 +577,10 @@ importChanges remote importtreeconfig importcontent thirdpartypopulated importab
}
where
diffchanged = map
(\(loc, v) -> (loc, DiffChanged v))
(\(wantedchecked, loc, v) -> (wantedchecked, loc, DiffChanged v))
(importableContents ic)
diffremoved = map
(\ti -> (mkloc ti, DiffRemoved))
(\ti -> (ImportWantedChecked False, mkloc ti, DiffRemoved))
removed
{- Gets the tree that was last imported from the remote
@ -671,7 +672,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
then return Nothing
else gohistory largematcher cidmap importing db h >>= return . \case
Nothing -> Nothing
Just h' -> Just $ ImportableContents (catMaybes l') h'
Just h' -> Just $ ImportableContents (map (\(loc, k) -> (ImportWantedChecked False, loc, k)) $ catMaybes l') h'
gohistory largematcher cidmap importing db h = do
h' <- mapM (go True largematcher cidmap importing db) h
@ -689,7 +690,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
| otherwise = do
l <- forM (importableContentsSubTree c) $ \(loc, i) -> do
let loc' = importableContentsChunkFullLocation (importableContentsSubDir c) loc
thirdpartypopulatedimport db (loc', i) >>= return . \case
thirdpartypopulatedimport db (ImportWantedChecked False, loc', i) >>= return . \case
Just (_loc, k) -> Just (loc, k)
Nothing -> Nothing
return $ ImportableContentsChunk
@ -713,7 +714,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
s <- readTVar importing
writeTVar importing $ S.delete cid s
startimport cidmap importing db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
startimport cidmap importing db i@(_, loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
(k:ks) ->
-- If the same content was imported before
-- yielding multiple different keys, it's not clear
@ -748,7 +749,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
importaction
return (Right job)
thirdpartypopulatedimport db (loc, (cid, sz)) =
thirdpartypopulatedimport db (_, loc, (cid, sz)) =
case Remote.importKey ia of
Nothing -> return Nothing
Just importkey ->
@ -762,7 +763,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
warning (UnquotedString (show e))
return Nothing
importordownload cidmap (loc, (cid, sz)) largematcher = do
importordownload cidmap (_, loc, (cid, sz)) largematcher = do
f <- locworktreefile loc
matcher <- largematcher f
-- When importing a key is supported, always use it rather
@ -1024,18 +1025,17 @@ pruneImportMatcher = Utility.Matcher.pruneMatcher matchNeedsKey
-}
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
getImportableContents r importtreeconfig ci matcher = do
Remote.listImportableContents (Remote.importActions r) >>= \case
Just (ImportableContentsComplete ic) -> do
dbhandle <- opendbhandle
dbhandle <- opendbhandle
Remote.listImportableContents (Remote.importActions r) (wanted dbhandle) >>= \case
Just (ImportableContentsComplete ic) ->
Just . ImportableContentsComplete
<$> filterunwanted dbhandle ic
Just (c@(ImportableContentsChunked {})) -> do
dbhandle <- opendbhandle
Just (c@(ImportableContentsChunked {})) ->
Just <$> filterunwantedchunked dbhandle c
Nothing -> return Nothing
where
filterunwanted dbhandle ic = ImportableContents
<$> filterM (wanted dbhandle) (importableContents ic)
<$> filterM (wanted' dbhandle) (importableContents ic)
<*> mapM (filterunwanted dbhandle) (importableHistory ic)
filterunwantedchunked dbhandle c = ImportableContentsChunked
@ -1057,7 +1057,11 @@ getImportableContents r importtreeconfig ci matcher = do
void $ Export.updateExportTreeFromLog h
return h
wanted dbhandle (loc, (_cid, sz))
wanted dbhandle loc sz = ImportWantedChecked
<$> wanted' dbhandle (ImportWantedChecked False, loc, ((), sz))
wanted' dbhandle (ImportWantedChecked alreadychecked, loc, (_cid, sz))
| alreadychecked = pure True
| ingitdir = pure False
| otherwise =
isknown <||> (matches <&&> notignored)
@ -1069,7 +1073,7 @@ getImportableContents r importtreeconfig ci matcher = do
notignored = notIgnoredImportLocation importtreeconfig ci loc
wantedunder dbhandle root (loc, v) =
wanted dbhandle (importableContentsChunkFullLocation root loc, v)
wanted' dbhandle (ImportWantedChecked False, importableContentsChunkFullLocation root loc, v)
isKnownImportLocation :: Export.ExportHandle -> ImportLocation -> Annex Bool
isKnownImportLocation dbhandle loc = liftIO $