listImportableContents filtering to wanted files
This could in theory allow importing subsets of files with less memory use. Rather than building up a big import list and then filtering it to a smaller list of wanted files, support optionally filtering wanted files first. So far, the directory special remote implements it and will probably use less memory. (Since dirContentsRecursiveSkipping does lazy streaming.) Implementation in Remote.S3 is incomplete and fails to compile. Bit of a mess with ResourceT needing to use Annex. Also, in Remote.S3, filtering is not done for old versions. And mkImportableContentsUnversioned is doing now redundant work to filterwanted.
This commit is contained in:
parent
d7ca716759
commit
41edf73789
7 changed files with 112 additions and 50 deletions
|
@ -293,7 +293,7 @@ buildImportTrees basetree msubdir (ImportedDiff (LastImportedTree oldtree) impor
|
|||
let (removed, new) = partition isremoved
|
||||
(importableContents imported)
|
||||
newtreeitems <- catMaybes <$> mapM mktreeitem new
|
||||
let removedfiles = map (mkloc . fst) removed
|
||||
let removedfiles = map (\(_, loc, _) -> mkloc loc) removed
|
||||
inRepo $ adjustTree
|
||||
(pure . Just)
|
||||
-- ^ keep files that are not added/removed the same
|
||||
|
@ -303,14 +303,14 @@ buildImportTrees basetree msubdir (ImportedDiff (LastImportedTree oldtree) impor
|
|||
removedfiles
|
||||
oldtree
|
||||
|
||||
mktreeitem (loc, DiffChanged v) =
|
||||
mktreeitem (_, loc, DiffChanged v) =
|
||||
Just <$> mkImportTreeItem msubdir loc v
|
||||
mktreeitem (_, DiffRemoved) =
|
||||
mktreeitem (_, _, DiffRemoved) =
|
||||
pure Nothing
|
||||
|
||||
mkloc = asTopFilePath . fromImportLocation
|
||||
|
||||
isremoved (_, v) = v == DiffRemoved
|
||||
isremoved (_, _, v) = v == DiffRemoved
|
||||
|
||||
convertImportTree :: Maybe TopFilePath -> [(ImportLocation, Either Sha Key)] -> Annex Tree
|
||||
convertImportTree msubdir ls =
|
||||
|
@ -429,11 +429,12 @@ buildImportTree
|
|||
:: (Maybe TopFilePath -> [(ImportLocation, v)] -> Annex Tree)
|
||||
-> Ref
|
||||
-> Maybe TopFilePath
|
||||
-> [(ImportLocation, v)]
|
||||
-> [(ImportWantedChecked, ImportLocation, v)]
|
||||
-> MkTreeHandle
|
||||
-> Annex Sha
|
||||
buildImportTree converttree basetree msubdir ls hdl = do
|
||||
importtree <- liftIO . recordTree' hdl =<< converttree msubdir ls
|
||||
importtree <- liftIO . recordTree' hdl
|
||||
=<< converttree msubdir (map (\(_, loc, v) -> (loc, v)) ls)
|
||||
graftImportTree basetree msubdir importtree hdl
|
||||
|
||||
graftImportTree
|
||||
|
@ -545,7 +546,7 @@ importChanges remote importtreeconfig importcontent thirdpartypopulated importab
|
|||
let (removed, changed) = partition isremoval diff
|
||||
let mkicchanged ti = do
|
||||
v <- M.lookup (Git.DiffTree.dstsha ti) cidtreemap
|
||||
return (mkloc ti, v)
|
||||
return (ImportWantedChecked False, mkloc ti, v)
|
||||
let ic = ImportableContentsComplete $ ImportableContents
|
||||
{ importableContents = mapMaybe mkicchanged changed
|
||||
, importableHistory = []
|
||||
|
@ -576,10 +577,10 @@ importChanges remote importtreeconfig importcontent thirdpartypopulated importab
|
|||
}
|
||||
where
|
||||
diffchanged = map
|
||||
(\(loc, v) -> (loc, DiffChanged v))
|
||||
(\(wantedchecked, loc, v) -> (wantedchecked, loc, DiffChanged v))
|
||||
(importableContents ic)
|
||||
diffremoved = map
|
||||
(\ti -> (mkloc ti, DiffRemoved))
|
||||
(\ti -> (ImportWantedChecked False, mkloc ti, DiffRemoved))
|
||||
removed
|
||||
|
||||
{- Gets the tree that was last imported from the remote
|
||||
|
@ -671,7 +672,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
then return Nothing
|
||||
else gohistory largematcher cidmap importing db h >>= return . \case
|
||||
Nothing -> Nothing
|
||||
Just h' -> Just $ ImportableContents (catMaybes l') h'
|
||||
Just h' -> Just $ ImportableContents (map (\(loc, k) -> (ImportWantedChecked False, loc, k)) $ catMaybes l') h'
|
||||
|
||||
gohistory largematcher cidmap importing db h = do
|
||||
h' <- mapM (go True largematcher cidmap importing db) h
|
||||
|
@ -689,7 +690,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
| otherwise = do
|
||||
l <- forM (importableContentsSubTree c) $ \(loc, i) -> do
|
||||
let loc' = importableContentsChunkFullLocation (importableContentsSubDir c) loc
|
||||
thirdpartypopulatedimport db (loc', i) >>= return . \case
|
||||
thirdpartypopulatedimport db (ImportWantedChecked False, loc', i) >>= return . \case
|
||||
Just (_loc, k) -> Just (loc, k)
|
||||
Nothing -> Nothing
|
||||
return $ ImportableContentsChunk
|
||||
|
@ -713,7 +714,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
s <- readTVar importing
|
||||
writeTVar importing $ S.delete cid s
|
||||
|
||||
startimport cidmap importing db i@(loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
|
||||
startimport cidmap importing db i@(_, loc, (cid, _sz)) oldversion largematcher = getcidkey cidmap db cid >>= \case
|
||||
(k:ks) ->
|
||||
-- If the same content was imported before
|
||||
-- yielding multiple different keys, it's not clear
|
||||
|
@ -748,7 +749,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
importaction
|
||||
return (Right job)
|
||||
|
||||
thirdpartypopulatedimport db (loc, (cid, sz)) =
|
||||
thirdpartypopulatedimport db (_, loc, (cid, sz)) =
|
||||
case Remote.importKey ia of
|
||||
Nothing -> return Nothing
|
||||
Just importkey ->
|
||||
|
@ -762,7 +763,7 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
warning (UnquotedString (show e))
|
||||
return Nothing
|
||||
|
||||
importordownload cidmap (loc, (cid, sz)) largematcher = do
|
||||
importordownload cidmap (_, loc, (cid, sz)) largematcher = do
|
||||
f <- locworktreefile loc
|
||||
matcher <- largematcher f
|
||||
-- When importing a key is supported, always use it rather
|
||||
|
@ -1024,18 +1025,17 @@ pruneImportMatcher = Utility.Matcher.pruneMatcher matchNeedsKey
|
|||
-}
|
||||
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
getImportableContents r importtreeconfig ci matcher = do
|
||||
Remote.listImportableContents (Remote.importActions r) >>= \case
|
||||
Just (ImportableContentsComplete ic) -> do
|
||||
dbhandle <- opendbhandle
|
||||
dbhandle <- opendbhandle
|
||||
Remote.listImportableContents (Remote.importActions r) (wanted dbhandle) >>= \case
|
||||
Just (ImportableContentsComplete ic) ->
|
||||
Just . ImportableContentsComplete
|
||||
<$> filterunwanted dbhandle ic
|
||||
Just (c@(ImportableContentsChunked {})) -> do
|
||||
dbhandle <- opendbhandle
|
||||
Just (c@(ImportableContentsChunked {})) ->
|
||||
Just <$> filterunwantedchunked dbhandle c
|
||||
Nothing -> return Nothing
|
||||
where
|
||||
filterunwanted dbhandle ic = ImportableContents
|
||||
<$> filterM (wanted dbhandle) (importableContents ic)
|
||||
<$> filterM (wanted' dbhandle) (importableContents ic)
|
||||
<*> mapM (filterunwanted dbhandle) (importableHistory ic)
|
||||
|
||||
filterunwantedchunked dbhandle c = ImportableContentsChunked
|
||||
|
@ -1057,7 +1057,11 @@ getImportableContents r importtreeconfig ci matcher = do
|
|||
void $ Export.updateExportTreeFromLog h
|
||||
return h
|
||||
|
||||
wanted dbhandle (loc, (_cid, sz))
|
||||
wanted dbhandle loc sz = ImportWantedChecked
|
||||
<$> wanted' dbhandle (ImportWantedChecked False, loc, ((), sz))
|
||||
|
||||
wanted' dbhandle (ImportWantedChecked alreadychecked, loc, (_cid, sz))
|
||||
| alreadychecked = pure True
|
||||
| ingitdir = pure False
|
||||
| otherwise =
|
||||
isknown <||> (matches <&&> notignored)
|
||||
|
@ -1069,7 +1073,7 @@ getImportableContents r importtreeconfig ci matcher = do
|
|||
notignored = notIgnoredImportLocation importtreeconfig ci loc
|
||||
|
||||
wantedunder dbhandle root (loc, v) =
|
||||
wanted dbhandle (importableContentsChunkFullLocation root loc, v)
|
||||
wanted' dbhandle (ImportWantedChecked False, importableContentsChunkFullLocation root loc, v)
|
||||
|
||||
isKnownImportLocation :: Export.ExportHandle -> ImportLocation -> Annex Bool
|
||||
isKnownImportLocation dbhandle loc = liftIO $
|
||||
|
|
|
@ -294,8 +294,13 @@ renameExportM serial adir _k old new = do
|
|||
, File newloc
|
||||
]
|
||||
|
||||
listImportableContentsM :: AndroidSerial -> AndroidPath -> ParsedRemoteConfig -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM serial adir c = adbfind >>= \case
|
||||
listImportableContentsM
|
||||
:: AndroidSerial
|
||||
-> AndroidPath
|
||||
-> ParsedRemoteConfig
|
||||
-> ImportWantedChecker Annex
|
||||
-> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM serial adir c _wanted = adbfind >>= \case
|
||||
Just ls -> return $ Just $ ImportableContentsComplete $
|
||||
ImportableContents (mapMaybe mk ls) []
|
||||
Nothing -> giveup "adb find failed"
|
||||
|
@ -344,7 +349,7 @@ listImportableContentsM serial adir c = adbfind >>= \case
|
|||
cid = ContentIdentifier (encodeBS stat)
|
||||
loc = mkImportLocation $ toRawFilePath $
|
||||
Posix.makeRelative (fromAndroidPath adir) fn
|
||||
in Just (loc, (cid, sz))
|
||||
in Just (ImportWantedChecked False, loc, (cid, sz))
|
||||
mk _ = Nothing
|
||||
|
||||
-- This does not guard against every possible race. As long as the adb
|
||||
|
|
|
@ -168,8 +168,13 @@ checkAvailability :: BorgRepo -> Annex Availability
|
|||
checkAvailability borgrepo@(BorgRepo r) =
|
||||
checkPathAvailability (borgLocal borgrepo) r
|
||||
|
||||
listImportableContentsM :: UUID -> BorgRepo -> ParsedRemoteConfig -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM u borgrepo c = prompt $ do
|
||||
listImportableContentsM
|
||||
:: UUID
|
||||
-> BorgRepo
|
||||
-> ParsedRemoteConfig
|
||||
-> ImportWantedChecker Annex
|
||||
-> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM u borgrepo c _wanted = prompt $ do
|
||||
imported <- getImported u
|
||||
ls <- withborglist (locBorgRepo borgrepo) Nothing formatarchivelist $ \as ->
|
||||
forM (filter (not . S.null) as) $ \archivename ->
|
||||
|
|
|
@ -382,21 +382,28 @@ removeExportLocation topdir loc =
|
|||
mkExportLocation loc'
|
||||
in go (upFrom loc') =<< tryIO (removeDirectory p)
|
||||
|
||||
listImportableContentsM :: IgnoreInodes -> RawFilePath -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM ii dir = liftIO $ do
|
||||
l <- dirContentsRecursiveSkipping (const False) False (fromRawFilePath dir)
|
||||
listImportableContentsM
|
||||
:: IgnoreInodes
|
||||
-> RawFilePath
|
||||
-> ImportWantedChecker Annex
|
||||
-> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM ii dir wanted = do
|
||||
l <- liftIO $ dirContentsRecursiveSkipping (const False) False (fromRawFilePath dir)
|
||||
l' <- mapM (go . toRawFilePath) l
|
||||
return $ Just $ ImportableContentsComplete $
|
||||
ImportableContents (catMaybes l') []
|
||||
where
|
||||
go f = do
|
||||
st <- R.getSymbolicLinkStatus f
|
||||
mkContentIdentifier ii f st >>= \case
|
||||
Nothing -> return Nothing
|
||||
Just cid -> do
|
||||
relf <- relPathDirToFile dir f
|
||||
sz <- getFileSize' f st
|
||||
return $ Just (mkImportLocation relf, (cid, sz))
|
||||
relf <- liftIO $ relPathDirToFile dir f
|
||||
st <- liftIO $ R.getSymbolicLinkStatus f
|
||||
sz <- liftIO $ getFileSize' f st
|
||||
let loc = mkImportLocation relf
|
||||
wanted loc sz >>= \case
|
||||
ImportWantedChecked False -> return Nothing
|
||||
checked@(ImportWantedChecked True) ->
|
||||
liftIO (mkContentIdentifier ii f st) >>= return . \case
|
||||
Nothing -> Nothing
|
||||
Just cid -> Just (checked, loc, (cid, sz))
|
||||
|
||||
newtype IgnoreInodes = IgnoreInodes Bool
|
||||
|
||||
|
|
48
Remote/S3.hs
48
Remote/S3.hs
|
@ -34,6 +34,7 @@ import Network.HTTP.Types
|
|||
import Network.URI
|
||||
import Control.Monad.Trans.Resource
|
||||
import Control.Monad.Catch
|
||||
import Control.Monad.Trans
|
||||
import Control.Concurrent.STM (atomically)
|
||||
import Control.Concurrent.STM.TVar
|
||||
import Data.Maybe
|
||||
|
@ -568,43 +569,68 @@ renameExportS3 hv r rs info k src dest = Just <$> go
|
|||
srcobject = T.pack $ bucketExportLocation info src
|
||||
dstobject = T.pack $ bucketExportLocation info dest
|
||||
|
||||
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> ParsedRemoteConfig -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsS3 hv r info c =
|
||||
listImportableContentsS3
|
||||
:: S3HandleVar
|
||||
-> Remote
|
||||
-> S3Info
|
||||
-> ParsedRemoteConfig
|
||||
-> ImportWantedChecker Annex
|
||||
-> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsS3 hv r info c wanted =
|
||||
withS3Handle hv $ \case
|
||||
Right h -> Just <$> go h
|
||||
Left p -> giveupS3HandleProblem p (uuid r)
|
||||
where
|
||||
go :: S3Handle -> Annex (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize))
|
||||
go h = do
|
||||
ic <- liftIO $ runResourceT $ extractFromResourceT =<< startlist h
|
||||
ic <- runResourceT $ extractFromResourceT =<< startlist h
|
||||
return (ImportableContentsComplete ic)
|
||||
|
||||
fileprefix = T.pack <$> getRemoteConfigValue fileprefixField c
|
||||
|
||||
startlist :: S3Handle -> ResourceT Annex (ImportableContents (ContentIdentifier, ByteSize))
|
||||
startlist h
|
||||
| versioning info = do
|
||||
rsp <- sendS3Handle h $
|
||||
rsp <- lift $ sendS3Handle h $
|
||||
S3.getBucketObjectVersions (bucket info)
|
||||
continuelistversioned h [] rsp
|
||||
| otherwise = do
|
||||
rsp <- sendS3Handle h $
|
||||
rsp <- lift $ sendS3Handle h $
|
||||
(S3.getBucket (bucket info))
|
||||
{ S3.gbPrefix = fileprefix }
|
||||
continuelistunversioned h [] rsp
|
||||
|
||||
continuelistunversioned :: S3Handle -> [S3.GetBucketResponse] -> S3.GetBucketResponse -> ResourceT Annex (ImportableContents (ContentIdentifier, ByteSize))
|
||||
continuelistunversioned h l rsp
|
||||
| S3.gbrIsTruncated rsp = do
|
||||
rsp' <- sendS3Handle h $
|
||||
rsp' <- lift $ sendS3Handle h $
|
||||
(S3.getBucket (bucket info))
|
||||
{ S3.gbMarker = S3.gbrNextMarker rsp
|
||||
, S3.gbPrefix = fileprefix
|
||||
}
|
||||
-- wantedrsp <- filterwanted rsp
|
||||
continuelistunversioned h (rsp:l) rsp'
|
||||
| otherwise = return $
|
||||
mkImportableContentsUnversioned info (reverse (rsp:l))
|
||||
| otherwise = do
|
||||
--wantedrsp <- filterwanted rsp
|
||||
return $
|
||||
mkImportableContentsUnversioned info (reverse (rsp:l))
|
||||
|
||||
filterwanted rsp = filterwanted' [] (S3.gbrContents rsp)
|
||||
filterwanted' c [] = pure c
|
||||
filterwanted' c (oi:ois) =
|
||||
case bucketImportLocation info $ T.unpack $ S3.objectKey oi of
|
||||
Nothing -> filterwanted' c ois
|
||||
Just loc -> do
|
||||
let sz = S3.objectSize oi
|
||||
let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi
|
||||
wanted loc sz >>= \case
|
||||
ImportWantedChecked False -> filterwanted' c ois
|
||||
checked@(ImportWantedChecked True) ->
|
||||
filterwanted' ((checked, loc, (cid, sz)):c) ois
|
||||
|
||||
continuelistversioned h l rsp
|
||||
| S3.gbovrIsTruncated rsp = do
|
||||
rsp' <- sendS3Handle h $
|
||||
rsp' <- lift $ sendS3Handle h $
|
||||
(S3.getBucketObjectVersions (bucket info))
|
||||
{ S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp
|
||||
, S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp
|
||||
|
@ -625,7 +651,7 @@ mkImportableContentsUnversioned info l = ImportableContents
|
|||
T.unpack $ S3.objectKey oi
|
||||
let sz = S3.objectSize oi
|
||||
let cid = mkS3UnversionedContentIdentifier $ S3.objectETag oi
|
||||
return (loc, (cid, sz))
|
||||
return (ImportWantedChecked False, loc, (cid, sz))
|
||||
|
||||
mkImportableContentsVersioned :: S3Info -> [S3.GetBucketObjectVersionsResponse] -> ImportableContents (ContentIdentifier, ByteSize)
|
||||
mkImportableContentsVersioned info = build . groupfiles
|
||||
|
@ -645,7 +671,7 @@ mkImportableContentsVersioned info = build . groupfiles
|
|||
T.unpack $ S3.oviKey ovi
|
||||
let sz = S3.oviSize ovi
|
||||
let cid = mkS3VersionedContentIdentifier' ovi
|
||||
return (loc, (cid, sz))
|
||||
return (ImportWantedChecked False, loc, (cid, sz))
|
||||
extract (S3.DeleteMarker {}) = Nothing
|
||||
|
||||
-- group files so all versions of a file are in a sublist,
|
||||
|
|
|
@ -18,6 +18,7 @@ import qualified System.FilePath.Posix.ByteString as Posix
|
|||
import Types.Export
|
||||
import Utility.QuickCheck
|
||||
import Utility.FileSystemEncoding
|
||||
import Utility.DataUnits
|
||||
|
||||
{- Location of content on a remote that can be imported.
|
||||
- This is just an alias to ExportLocation, because both are referring to a
|
||||
|
@ -55,7 +56,7 @@ instance Arbitrary ContentIdentifier where
|
|||
{- List of files that can be imported from a remote, each with some added
|
||||
- information. -}
|
||||
data ImportableContents info = ImportableContents
|
||||
{ importableContents :: [(ImportLocation, info)]
|
||||
{ importableContents :: [(ImportWantedChecked, ImportLocation, info)]
|
||||
, importableHistory :: [ImportableContents info]
|
||||
-- ^ Used by remotes that support importing historical versions of
|
||||
-- files that are stored in them. This is equivalent to a git
|
||||
|
@ -103,3 +104,17 @@ importableContentsChunkFullLocation
|
|||
-> ImportLocation
|
||||
importableContentsChunkFullLocation (ImportChunkSubDir root) loc =
|
||||
mkImportLocation $ Posix.combine root loc
|
||||
|
||||
newtype ImportWantedChecked = ImportWantedChecked Bool
|
||||
deriving (Show, Generic)
|
||||
|
||||
instance NFData ImportWantedChecked
|
||||
|
||||
{- This action may optinally be used to check if a file will be wanted in
|
||||
- the import when constructing the ImportableContents.
|
||||
- Filtering out unwanted files at that point makes the import use less
|
||||
- memory, but is optional. If ImportWantedChecked False is used instead,
|
||||
- it will be checked in a later pass.
|
||||
-}
|
||||
type ImportWantedChecker a = ImportLocation -> ByteSize -> a ImportWantedChecked
|
||||
|
||||
|
|
|
@ -314,7 +314,7 @@ data ImportActions a = ImportActions
|
|||
--
|
||||
-- Throws exception on failure to access the remote.
|
||||
-- May return Nothing when the remote is unchanged since last time.
|
||||
{ listImportableContents :: a (Maybe (ImportableContentsChunkable a (ContentIdentifier, ByteSize)))
|
||||
{ listImportableContents :: ImportWantedChecker a -> a (Maybe (ImportableContentsChunkable a (ContentIdentifier, ByteSize)))
|
||||
-- Generates a Key (of any type) for the file stored on the
|
||||
-- remote at the ImportLocation. Does not download the file
|
||||
-- from the remote.
|
||||
|
|
Loading…
Reference in a new issue