optimisation for borg
Skip needing to list importable contents when unchanged since last time.
This commit is contained in:
		
					parent
					
						
							
								e1ac42be77
							
						
					
				
			
			
				commit
				
					
						4f9969d0a1
					
				
			
		
					 9 changed files with 32 additions and 21 deletions
				
			
		|  | @ -660,12 +660,15 @@ makeImportMatcher r = load preferredContentKeylessTokens >>= \case | ||||||
|  - would delete the files. |  - would delete the files. | ||||||
|  - |  - | ||||||
|  - Throws exception if unable to contact the remote. |  - Throws exception if unable to contact the remote. | ||||||
|  |  - Returns Nothing when there is no change since last time. | ||||||
|  -} |  -} | ||||||
| getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (ImportableContents (ContentIdentifier, ByteSize)) | getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) | ||||||
| getImportableContents r importtreeconfig ci matcher = do | getImportableContents r importtreeconfig ci matcher = do | ||||||
| 	importable <- Remote.listImportableContents (Remote.importActions r) | 	Remote.listImportableContents (Remote.importActions r) >>= \case | ||||||
| 	dbhandle <- Export.openDb (Remote.uuid r) | 		Just importable -> do | ||||||
| 	filterunwanted dbhandle importable | 			dbhandle <- Export.openDb (Remote.uuid r) | ||||||
|  | 			Just <$> filterunwanted dbhandle importable | ||||||
|  | 		Nothing -> return Nothing | ||||||
|   where |   where | ||||||
| 	filterunwanted dbhandle ic = ImportableContents | 	filterunwanted dbhandle ic = ImportableContents | ||||||
| 		<$> filterM (wanted dbhandle) (importableContents ic) | 		<$> filterM (wanted dbhandle) (importableContents ic) | ||||||
|  |  | ||||||
|  | @ -325,13 +325,13 @@ seekRemote remote branch msubdir importcontent ci = do | ||||||
| listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart | listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart | ||||||
| listContents remote importtreeconfig ci tvar = starting "list" ai si $ | listContents remote importtreeconfig ci tvar = starting "list" ai si $ | ||||||
| 	listContents' remote importtreeconfig ci $ \importable -> do | 	listContents' remote importtreeconfig ci $ \importable -> do | ||||||
| 		liftIO $ atomically $ writeTVar tvar (Just importable) | 		liftIO $ atomically $ writeTVar tvar importable | ||||||
| 		next $ return True | 		next $ return True | ||||||
|   where |   where | ||||||
| 	ai = ActionItemOther (Just (Remote.name remote)) | 	ai = ActionItemOther (Just (Remote.name remote)) | ||||||
| 	si = SeekInput [] | 	si = SeekInput [] | ||||||
| 
 | 
 | ||||||
| listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (ImportableContents (ContentIdentifier, Remote.ByteSize) -> Annex a) -> Annex a | listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a | ||||||
| listContents' remote importtreeconfig ci a =  | listContents' remote importtreeconfig ci a =  | ||||||
| 	makeImportMatcher remote >>= \case | 	makeImportMatcher remote >>= \case | ||||||
| 		Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case | 		Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case | ||||||
|  |  | ||||||
|  | @ -495,13 +495,14 @@ importThirdPartyPopulated remote = | ||||||
| 	void $ includeCommandAction $ starting "list" ai si $ | 	void $ includeCommandAction $ starting "list" ai si $ | ||||||
| 		Command.Import.listContents' remote ImportTree (CheckGitIgnore False) go | 		Command.Import.listContents' remote ImportTree (CheckGitIgnore False) go | ||||||
|   where |   where | ||||||
| 	go importable = importKeys remote ImportTree False True importable >>= \case | 	go (Just importable) = importKeys remote ImportTree False True importable >>= \case | ||||||
| 		Just importablekeys -> do | 		Just importablekeys -> do | ||||||
| 			(_imported, updatestate) <- recordImportTree remote ImportTree importablekeys | 			(_imported, updatestate) <- recordImportTree remote ImportTree importablekeys | ||||||
| 			next $ do | 			next $ do | ||||||
| 				updatestate | 				updatestate | ||||||
| 				return True | 				return True | ||||||
| 		Nothing -> next $ return False | 		Nothing -> next $ return False | ||||||
|  | 	go Nothing = next $ return True -- unchanged from before | ||||||
| 
 | 
 | ||||||
| 	ai = ActionItemOther (Just (Remote.name remote)) | 	ai = ActionItemOther (Just (Remote.name remote)) | ||||||
| 	si = SeekInput [] | 	si = SeekInput [] | ||||||
|  |  | ||||||
|  | @ -286,9 +286,9 @@ renameExportM serial adir _k old new = do | ||||||
| 		, File newloc | 		, File newloc | ||||||
| 		] | 		] | ||||||
| 
 | 
 | ||||||
| listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (ImportableContents (ContentIdentifier, ByteSize)) | listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) | ||||||
| listImportableContentsM serial adir = adbfind >>= \case | listImportableContentsM serial adir = adbfind >>= \case | ||||||
| 	Just ls -> return $ ImportableContents (mapMaybe mk ls) [] | 	Just ls -> return $ Just $ ImportableContents (mapMaybe mk ls) [] | ||||||
| 	Nothing -> giveup "adb find failed" | 	Nothing -> giveup "adb find failed" | ||||||
|   where |   where | ||||||
| 	adbfind = adbShell serial | 	adbfind = adbShell serial | ||||||
|  |  | ||||||
|  | @ -26,6 +26,7 @@ import Utility.Metered | ||||||
| import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated | import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated | ||||||
| import Logs.Export | import Logs.Export | ||||||
| 
 | 
 | ||||||
|  | import Data.Either | ||||||
| import Text.Read | import Text.Read | ||||||
| import Control.Exception (evaluate) | import Control.Exception (evaluate) | ||||||
| import Control.DeepSeq | import Control.DeepSeq | ||||||
|  | @ -122,8 +123,6 @@ borgSetup _ mu _ c _gc = do | ||||||
| 	-- persistant state, so it can vary between hosts. | 	-- persistant state, so it can vary between hosts. | ||||||
| 	gitConfigSpecialRemote u c [("borgrepo", borgrepo)] | 	gitConfigSpecialRemote u c [("borgrepo", borgrepo)] | ||||||
| 
 | 
 | ||||||
| 	-- TODO: untrusted by default, but allow overriding that |  | ||||||
| 
 |  | ||||||
| 	return (c, u) | 	return (c, u) | ||||||
| 
 | 
 | ||||||
| borgLocal :: BorgRepo -> Bool | borgLocal :: BorgRepo -> Bool | ||||||
|  | @ -132,18 +131,20 @@ borgLocal = notElem ':' | ||||||
| -- XXX the tree generated by using this does not seem to get grafted into | -- XXX the tree generated by using this does not seem to get grafted into | ||||||
| -- the git-annex branch, so would be subject to being lost to GC. | -- the git-annex branch, so would be subject to being lost to GC. | ||||||
| -- Is this a general problem affecting importtree too? | -- Is this a general problem affecting importtree too? | ||||||
| listImportableContentsM :: UUID -> BorgRepo -> Annex (ImportableContents (ContentIdentifier, ByteSize)) | listImportableContentsM :: UUID -> BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) | ||||||
| listImportableContentsM u borgrepo = prompt $ do | listImportableContentsM u borgrepo = prompt $ do | ||||||
| 	imported <- getImported u | 	imported <- getImported u | ||||||
| 	ls <- withborglist borgrepo "{barchive}{NUL}" $ \as -> | 	ls <- withborglist borgrepo "{barchive}{NUL}" $ \as -> | ||||||
| 		forM as $ \archivename -> | 		forM as $ \archivename -> | ||||||
| 			case M.lookup archivename imported of | 			case M.lookup archivename imported of | ||||||
| 				Just getfast -> getfast | 				Just getfast -> return $ Left getfast | ||||||
| 				Nothing ->  | 				Nothing -> Right <$> | ||||||
| 					let archive = borgrepo ++ "::" ++ decodeBS' archivename | 					let archive = borgrepo ++ "::" ++ decodeBS' archivename | ||||||
| 					in withborglist archive "{size}{NUL}{path}{NUL}" $ | 					in withborglist archive "{size}{NUL}{path}{NUL}" $ | ||||||
| 						liftIO . evaluate . force . parsefilelist archivename | 						liftIO . evaluate . force . parsefilelist archivename | ||||||
| 	return $ mkimportablecontents ls | 	if all isLeft ls | ||||||
|  | 		then return Nothing -- unchanged since last time, avoid work | ||||||
|  | 		else Just . mkimportablecontents <$> mapM (either id pure) ls | ||||||
|   where |   where | ||||||
| 	withborglist what format a = do | 	withborglist what format a = do | ||||||
| 		let p = (proc "borg" ["list", what, "--format", format]) | 		let p = (proc "borg" ["list", what, "--format", format]) | ||||||
|  |  | ||||||
|  | @ -337,11 +337,11 @@ removeExportLocation topdir loc = | ||||||
| 			mkExportLocation loc' | 			mkExportLocation loc' | ||||||
| 		in go (upFrom loc') =<< tryIO (removeDirectory p) | 		in go (upFrom loc') =<< tryIO (removeDirectory p) | ||||||
| 
 | 
 | ||||||
| listImportableContentsM :: RawFilePath -> Annex (ImportableContents (ContentIdentifier, ByteSize)) | listImportableContentsM :: RawFilePath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) | ||||||
| listImportableContentsM dir = liftIO $ do | listImportableContentsM dir = liftIO $ do | ||||||
| 	l <- dirContentsRecursive (fromRawFilePath dir) | 	l <- dirContentsRecursive (fromRawFilePath dir) | ||||||
| 	l' <- mapM (go . toRawFilePath) l | 	l' <- mapM (go . toRawFilePath) l | ||||||
| 	return $ ImportableContents (catMaybes l') [] | 	return $ Just $ ImportableContents (catMaybes l') [] | ||||||
|   where |   where | ||||||
| 	go f = do | 	go f = do | ||||||
| 		st <- R.getFileStatus f | 		st <- R.getFileStatus f | ||||||
|  |  | ||||||
|  | @ -550,13 +550,14 @@ renameExportS3 hv r rs info k src dest = Just <$> go | ||||||
| 	srcobject = T.pack $ bucketExportLocation info src | 	srcobject = T.pack $ bucketExportLocation info src | ||||||
| 	dstobject = T.pack $ bucketExportLocation info dest | 	dstobject = T.pack $ bucketExportLocation info dest | ||||||
| 
 | 
 | ||||||
| listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (ImportableContents (ContentIdentifier, ByteSize)) | listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) | ||||||
| listImportableContentsS3 hv r info = | listImportableContentsS3 hv r info = | ||||||
| 	withS3Handle hv $ \case | 	withS3Handle hv $ \case | ||||||
| 		Nothing -> giveup $ needS3Creds (uuid r) | 		Nothing -> giveup $ needS3Creds (uuid r) | ||||||
| 		Just h -> liftIO $ runResourceT $ | 		Just h -> Just <$> go h | ||||||
| 			extractFromResourceT =<< startlist h |  | ||||||
|   where |   where | ||||||
|  | 	go h = liftIO $ runResourceT $ extractFromResourceT =<< startlist h | ||||||
|  | 
 | ||||||
| 	startlist h | 	startlist h | ||||||
| 		| versioning info = do | 		| versioning info = do | ||||||
| 			rsp <- sendS3Handle h $  | 			rsp <- sendS3Handle h $  | ||||||
|  |  | ||||||
|  | @ -283,7 +283,8 @@ data ImportActions a = ImportActions | ||||||
| 	-- remote. | 	-- remote. | ||||||
| 	-- | 	-- | ||||||
| 	-- Throws exception on failure to access the remote. | 	-- Throws exception on failure to access the remote. | ||||||
| 	{ listImportableContents :: a (ImportableContents (ContentIdentifier, ByteSize)) | 	-- May return Nothing when the remote is unchanged since last time. | ||||||
|  | 	{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize))) | ||||||
| 	-- Generates a Key (of any type) for the file stored on the | 	-- Generates a Key (of any type) for the file stored on the | ||||||
| 	-- remote at the ImportLocation. Does not download the file | 	-- remote at the ImportLocation. Does not download the file | ||||||
| 	-- from the remote. | 	-- from the remote. | ||||||
|  |  | ||||||
|  | @ -62,3 +62,7 @@ So either keep the borg special remote as untrusted, and use such borg | ||||||
| commands to delete old archives as needed, or avoid using `borg delete` | commands to delete old archives as needed, or avoid using `borg delete` | ||||||
| and `borg prune`, and then the remote can safely be made semitrusted or | and `borg prune`, and then the remote can safely be made semitrusted or | ||||||
| trusted. | trusted. | ||||||
|  | 
 | ||||||
|  | Also, if you do choose to delete old archives, make sure to never reuse | ||||||
|  | that archive name for a new archive. git-annex may think it's the same | ||||||
|  | archive it saw before, and not notice the change. | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Joey Hess
				Joey Hess