avoid renaming to temp files before deleting

Only rename when actually ncessary.

The diff gets buffered in memory. Probably git has to buffer a diff in
memory when generating it as well, so this memory usage should not be a
problem, even when the diff is very large. I hope.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-09-07 14:32:47 -04:00
parent 16eb2f976c
commit a48b52c056
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 61 additions and 32 deletions

View file

@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE TupleSections #-}
module Command.Export where
import Command
@ -26,6 +28,7 @@ import Messages.Progress
import Utility.Tmp
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
cmd :: Command
cmd = command "export" SectionCommon
@ -49,7 +52,7 @@ optParser _ = ExportOptions
-- An export includes both annexed files and files stored in git.
-- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key
deriving (Show)
deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key
asKey (AnnexKey k) = k
@ -103,17 +106,22 @@ seek o = do
case map exportedTreeish old of
[] -> return ()
[oldtreesha] -> do
-- Rename all old files to temp.
mapdiff
(\diff -> startMoveToTempName r db (Git.DiffTree.file diff) (Git.DiffTree.srcsha diff))
oldtreesha new
diffmap <- mkDiffMap oldtreesha new
let seekdiffmap a = seekActions $ pure $ map a (M.toList diffmap)
-- Rename old files to temp, or delete.
seekdiffmap $ \(ek, (moldf, mnewf)) ->
case (moldf, mnewf) of
(Just oldf, Just _newf) ->
startMoveToTempName r db oldf ek
(Just oldf, Nothing) ->
startUnexport' r db oldf ek
_ -> stop
-- Rename from temp to new files.
mapdiff (\diff -> startMoveFromTempName r db (Git.DiffTree.dstsha diff) (Git.DiffTree.file diff))
oldtreesha new
-- Remove all remaining temps.
mapdiff
(startUnexportTempName r db . Git.DiffTree.srcsha)
oldtreesha new
seekdiffmap $ \(ek, (moldf, mnewf)) ->
case (moldf, mnewf) of
(Just _oldf, Just newf) ->
startMoveFromTempName r db ek newf
_ -> stop
ts -> do
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
forM_ ts $ \oldtreesha -> do
@ -126,7 +134,7 @@ seek o = do
, Git.DiffTree.dstsha d
]
-- Don't rename to temp, because the
-- content is unknown; unexport instead.
-- content is unknown; delete instead.
mapdiff
(\diff -> startUnexport r db (Git.DiffTree.file diff) (unexportboth diff))
oldtreesha new
@ -152,6 +160,28 @@ seek o = do
seekActions $ pure $ map a diff
void $ liftIO cleanup
-- Map of old and new filenames for each changed ExportKey in a diff.
type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath)
mkDiffMap :: Git.Ref -> Git.Ref -> Annex DiffMap
mkDiffMap old new = do
(diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive old new
diffmap <- M.fromListWith combinedm . concat <$> forM diff mkdm
void $ liftIO cleanup
return diffmap
where
combinedm (srca, dsta) (srcb, dstb) = (srca <|> srcb, dsta <|> dstb)
mkdm i = do
srcek <- getk (Git.DiffTree.srcsha i)
dstek <- getk (Git.DiffTree.dstsha i)
return $ catMaybes
[ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek
, (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek
]
getk sha
| sha == nullSha = return Nothing
| otherwise = Just <$> exportKey sha
startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart
startExport r db ti = do
ek <- exportKey (Git.LsTree.sha ti)
@ -204,6 +234,14 @@ startUnexport r db f shas = do
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startUnexport' r db f ek = do
showStart "unexport" f'
next $ performUnexport r db [ek] loc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform
performUnexport r db eks loc = do
ifM (allM (\ek -> removeExport (exportActions r) (asKey ek) loc) eks)
@ -236,27 +274,21 @@ startUnexportTempName r db sha
showStart "unexport" f
next $ performUnexport r db [ek] loc
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Git.Sha -> CommandStart
startMoveToTempName r db f sha
| sha == nullSha = stop
| otherwise = do
ek <- exportKey sha
let tmploc@(ExportLocation tmpf) = exportTempName ek
showStart "rename" (f' ++ " -> " ++ tmpf)
next $ performRename r db ek loc tmploc
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startMoveToTempName r db f ek = do
let tmploc@(ExportLocation tmpf) = exportTempName ek
showStart "rename" (f' ++ " -> " ++ tmpf)
next $ performRename r db ek loc tmploc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
startMoveFromTempName :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
startMoveFromTempName r db sha f
| sha == nullSha = stop
| otherwise = do
ek <- exportKey sha
let tmploc@(ExportLocation tmpf) = exportTempName ek
stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
showStart "rename" (tmpf ++ " -> " ++ f')
next $ performRename r db ek tmploc loc
startMoveFromTempName :: Remote -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart
startMoveFromTempName r db ek f = do
let tmploc@(ExportLocation tmpf) = exportTempName ek
stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
showStart "rename" (tmpf ++ " -> " ++ f')
next $ performRename r db ek tmploc loc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f

View file

@ -24,8 +24,5 @@ Work is in progress. Todo list:
export from another repository also doesn't work right, because the
export database is not populated. So, seems that the export database needs
to get populated based on the export log in these cases.
* Currently all modified/deleted files are renamed to temp files,
even when they won't be used. Avoid doing this unless the
temp file will be renamed to the new filename.
* Support export to aditional special remotes (S3 etc)
* Support export to external special remotes.