update ExportTree table efficiently

Use same diff and key lookup except when the whole tree has to be
scanned.

This commit was sponsored by Peter Hogg on Patreon.
This commit is contained in:
Joey Hess 2017-09-18 14:24:42 -04:00
parent b03d77c211
commit 0ad7e36dc1
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 29 additions and 19 deletions

View file

@ -18,7 +18,6 @@ import qualified Git.Ref
import Git.Types import Git.Types
import Git.FilePath import Git.FilePath
import Git.Sha import Git.Sha
import Types.Key
import Types.Remote import Types.Remote
import Types.Export import Types.Export
import Annex.Export import Annex.Export
@ -57,7 +56,7 @@ optParser _ = ExportOptions
-- To handle renames which swap files, the exported file is first renamed -- To handle renames which swap files, the exported file is first renamed
-- to a stable temporary name based on the key. -- to a stable temporary name based on the key.
exportTempName :: ExportKey -> ExportLocation exportTempName :: ExportKey -> ExportLocation
exportTempName ek = ExportLocation $ exportTempName ek = mkExportLocation $
".git-annex-tmp-content-" ++ key2file (asKey (ek)) ".git-annex-tmp-content-" ++ key2file (asKey (ek))
seek :: ExportOptions -> CommandSeek seek :: ExportOptions -> CommandSeek
@ -91,14 +90,16 @@ seek' o r = do
-- changed files in the export. After this, every file that remains -- changed files in the export. After this, every file that remains
-- in the export will have the content from the new treeish. -- in the export will have the content from the new treeish.
-- --
-- (Also, when there was an export conflict, this resolves it.) -- When there was an export conflict, this resolves it.
--
-- The ExportTree is also updated here to reflect the new tree.
case map exportedTreeish old of case map exportedTreeish old of
[] -> return () [] -> updateExportTree db emptyTree new
[oldtreesha] -> do [oldtreesha] -> do
diffmap <- mkDiffMap oldtreesha new diffmap <- mkDiffMap oldtreesha new db
let seekdiffmap a = seekActions $ pure $ map a (M.toList diffmap) let seekdiffmap a = seekActions $ pure $ map a (M.toList diffmap)
-- Rename old files to temp, or delete. -- Rename old files to temp, or delete.
seekdiffmap $ \(ek, (moldf, mnewf)) -> seekdiffmap $ \(ek, (moldf, mnewf)) -> do
case (moldf, mnewf) of case (moldf, mnewf) of
(Just oldf, Just _newf) -> (Just oldf, Just _newf) ->
startMoveToTempName r ea db oldf ek startMoveToTempName r ea db oldf ek
@ -127,13 +128,14 @@ seek' o r = do
mapdiff mapdiff
(\diff -> startUnexport r ea db (Git.DiffTree.file diff) (unexportboth diff)) (\diff -> startUnexport r ea db (Git.DiffTree.file diff) (unexportboth diff))
oldtreesha new oldtreesha new
updateExportTree db emptyTree new
liftIO $ recordDataSource db new
-- Waiting until now to record the export guarantees that, -- Waiting until now to record the export guarantees that,
-- if this export is interrupted, there are no files left over -- if this export is interrupted, there are no files left over
-- from a previous export, that are not part of this export. -- from a previous export, that are not part of this export.
c <- Annex.getState Annex.errcounter c <- Annex.getState Annex.errcounter
when (c == 0) $ do when (c == 0) $ do
liftIO $ recordDataSource db new
recordExport (uuid r) $ ExportChange recordExport (uuid r) $ ExportChange
{ oldTreeish = map exportedTreeish old { oldTreeish = map exportedTreeish old
, newTreeish = new , newTreeish = new
@ -155,8 +157,8 @@ seek' o r = do
-- Map of old and new filenames for each changed ExportKey in a diff. -- Map of old and new filenames for each changed ExportKey in a diff.
type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath) type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath)
mkDiffMap :: Git.Ref -> Git.Ref -> Annex DiffMap mkDiffMap :: Git.Ref -> Git.Ref -> ExportHandle -> Annex DiffMap
mkDiffMap old new = do mkDiffMap old new db = do
(diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive old new (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive old new
diffmap <- M.fromListWith combinedm . concat <$> forM diff mkdm diffmap <- M.fromListWith combinedm . concat <$> forM diff mkdm
void $ liftIO cleanup void $ liftIO cleanup
@ -166,6 +168,7 @@ mkDiffMap old new = do
mkdm i = do mkdm i = do
srcek <- getek (Git.DiffTree.srcsha i) srcek <- getek (Git.DiffTree.srcsha i)
dstek <- getek (Git.DiffTree.dstsha i) dstek <- getek (Git.DiffTree.dstsha i)
updateExportTree' db srcek dstek i
return $ catMaybes return $ catMaybes
[ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek [ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek
, (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek , (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek
@ -263,7 +266,7 @@ startRecoverIncomplete r ea db sha oldf
| otherwise = do | otherwise = do
ek <- exportKey sha ek <- exportKey sha
let loc = exportTempName ek let loc = exportTempName ek
showStart "unexport" (fromExportLocation f) showStart "unexport" (fromExportLocation loc)
liftIO $ removeExportedLocation db (asKey ek) oldloc liftIO $ removeExportedLocation db (asKey ek) oldloc
next $ performUnexport r ea db [ek] loc next $ performUnexport r ea db [ek] loc
where where
@ -283,7 +286,7 @@ startMoveFromTempName :: Remote -> ExportActions Annex -> ExportHandle -> Export
startMoveFromTempName r ea db ek f = do startMoveFromTempName r ea db ek f = do
let tmploc = exportTempName ek let tmploc = exportTempName ek
stopUnless (liftIO $ elem tmploc <$> getExportedLocation db (asKey ek)) $ do stopUnless (liftIO $ elem tmploc <$> getExportedLocation db (asKey ek)) $ do
showStart "rename" (exportLocation tmploc ++ " -> " ++ f') showStart "rename" (fromExportLocation tmploc ++ " -> " ++ f')
next $ performRename r ea db ek tmploc loc next $ performRename r ea db ek tmploc loc
where where
loc = mkExportLocation f' loc = mkExportLocation f'

View file

@ -22,7 +22,10 @@ module Database.Export (
getExportedLocation, getExportedLocation,
isExportDirectoryEmpty, isExportDirectoryEmpty,
getExportTree, getExportTree,
addExportTree,
removeExportTree,
updateExportTree, updateExportTree,
updateExportTree',
ExportedId, ExportedId,
ExportTreeId, ExportTreeId,
ExportedDirectoryId, ExportedDirectoryId,
@ -183,18 +186,22 @@ updateExportTree h old new = do
(diff, cleanup) <- inRepo $ (diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive old new Git.DiffTree.diffTreeRecursive old new
forM_ diff $ \i -> do forM_ diff $ \i -> do
let loc = mkExportLocation $ getTopFilePath $
Git.DiffTree.file i
srcek <- getek (Git.DiffTree.srcsha i) srcek <- getek (Git.DiffTree.srcsha i)
case srcek of
Nothing -> return ()
Just k -> liftIO $ removeExportTree h (asKey k) loc
dstek <- getek (Git.DiffTree.dstsha i) dstek <- getek (Git.DiffTree.dstsha i)
case dstek of updateExportTree' h srcek dstek i
Nothing -> return ()
Just k -> liftIO $ addExportTree h (asKey k) loc
void $ liftIO cleanup void $ liftIO cleanup
where where
getek sha getek sha
| sha == nullSha = return Nothing | sha == nullSha = return Nothing
| otherwise = Just <$> exportKey sha | otherwise = Just <$> exportKey sha
updateExportTree' :: ExportHandle -> Maybe ExportKey -> Maybe ExportKey -> Git.DiffTree.DiffTreeItem-> Annex ()
updateExportTree' h srcek dstek i = do
case srcek of
Nothing -> return ()
Just k -> liftIO $ removeExportTree h (asKey k) loc
case dstek of
Nothing -> return ()
Just k -> liftIO $ addExportTree h (asKey k) loc
where
loc = mkExportLocation $ getTopFilePath $ Git.DiffTree.file i