avoid renaming to temp files before deleting

Only rename when actually ncessary.

The diff gets buffered in memory. Probably git has to buffer a diff in
memory when generating it as well, so this memory usage should not be a
problem, even when the diff is very large. I hope.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-09-07 14:32:47 -04:00
parent 16eb2f976c
commit a48b52c056
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 61 additions and 32 deletions

View file

@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE TupleSections #-}
module Command.Export where
import Command
@ -26,6 +28,7 @@ import Messages.Progress
import Utility.Tmp
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
cmd :: Command
cmd = command "export" SectionCommon
@ -49,7 +52,7 @@ optParser _ = ExportOptions
-- An export includes both annexed files and files stored in git.
-- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key
deriving (Show)
deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key
asKey (AnnexKey k) = k
@ -103,17 +106,22 @@ seek o = do
case map exportedTreeish old of
[] -> return ()
[oldtreesha] -> do
-- Rename all old files to temp.
mapdiff
(\diff -> startMoveToTempName r db (Git.DiffTree.file diff) (Git.DiffTree.srcsha diff))
oldtreesha new
diffmap <- mkDiffMap oldtreesha new
let seekdiffmap a = seekActions $ pure $ map a (M.toList diffmap)
-- Rename old files to temp, or delete.
seekdiffmap $ \(ek, (moldf, mnewf)) ->
case (moldf, mnewf) of
(Just oldf, Just _newf) ->
startMoveToTempName r db oldf ek
(Just oldf, Nothing) ->
startUnexport' r db oldf ek
_ -> stop
-- Rename from temp to new files.
mapdiff (\diff -> startMoveFromTempName r db (Git.DiffTree.dstsha diff) (Git.DiffTree.file diff))
oldtreesha new
-- Remove all remaining temps.
mapdiff
(startUnexportTempName r db . Git.DiffTree.srcsha)
oldtreesha new
seekdiffmap $ \(ek, (moldf, mnewf)) ->
case (moldf, mnewf) of
(Just _oldf, Just newf) ->
startMoveFromTempName r db ek newf
_ -> stop
ts -> do
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
forM_ ts $ \oldtreesha -> do
@ -126,7 +134,7 @@ seek o = do
, Git.DiffTree.dstsha d
]
-- Don't rename to temp, because the
-- content is unknown; unexport instead.
-- content is unknown; delete instead.
mapdiff
(\diff -> startUnexport r db (Git.DiffTree.file diff) (unexportboth diff))
oldtreesha new
@ -152,6 +160,28 @@ seek o = do
seekActions $ pure $ map a diff
void $ liftIO cleanup
-- Map of old and new filenames for each changed ExportKey in a diff.
type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath)
mkDiffMap :: Git.Ref -> Git.Ref -> Annex DiffMap
mkDiffMap old new = do
(diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive old new
diffmap <- M.fromListWith combinedm . concat <$> forM diff mkdm
void $ liftIO cleanup
return diffmap
where
combinedm (srca, dsta) (srcb, dstb) = (srca <|> srcb, dsta <|> dstb)
mkdm i = do
srcek <- getk (Git.DiffTree.srcsha i)
dstek <- getk (Git.DiffTree.dstsha i)
return $ catMaybes
[ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek
, (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek
]
getk sha
| sha == nullSha = return Nothing
| otherwise = Just <$> exportKey sha
startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart
startExport r db ti = do
ek <- exportKey (Git.LsTree.sha ti)
@ -204,6 +234,14 @@ startUnexport r db f shas = do
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startUnexport' r db f ek = do
showStart "unexport" f'
next $ performUnexport r db [ek] loc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform
performUnexport r db eks loc = do
ifM (allM (\ek -> removeExport (exportActions r) (asKey ek) loc) eks)
@ -236,27 +274,21 @@ startUnexportTempName r db sha
showStart "unexport" f
next $ performUnexport r db [ek] loc
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Git.Sha -> CommandStart
startMoveToTempName r db f sha
| sha == nullSha = stop
| otherwise = do
ek <- exportKey sha
let tmploc@(ExportLocation tmpf) = exportTempName ek
showStart "rename" (f' ++ " -> " ++ tmpf)
next $ performRename r db ek loc tmploc
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startMoveToTempName r db f ek = do
let tmploc@(ExportLocation tmpf) = exportTempName ek
showStart "rename" (f' ++ " -> " ++ tmpf)
next $ performRename r db ek loc tmploc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
startMoveFromTempName :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
startMoveFromTempName r db sha f
| sha == nullSha = stop
| otherwise = do
ek <- exportKey sha
let tmploc@(ExportLocation tmpf) = exportTempName ek
stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
showStart "rename" (tmpf ++ " -> " ++ f')
next $ performRename r db ek tmploc loc
startMoveFromTempName :: Remote -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart
startMoveFromTempName r db ek f = do
let tmploc@(ExportLocation tmpf) = exportTempName ek
stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
showStart "rename" (tmpf ++ " -> " ++ f')
next $ performRename r db ek tmploc loc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f