avoid renaming to temp files before deleting

Only rename when actually ncessary.

The diff gets buffered in memory. Probably git has to buffer a diff in
memory when generating it as well, so this memory usage should not be a
problem, even when the diff is very large. I hope.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-09-07 14:32:47 -04:00
parent 16eb2f976c
commit a48b52c056
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 61 additions and 32 deletions

View file

@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# LANGUAGE TupleSections #-}
module Command.Export where module Command.Export where
import Command import Command
@ -26,6 +28,7 @@ import Messages.Progress
import Utility.Tmp import Utility.Tmp
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
cmd :: Command cmd :: Command
cmd = command "export" SectionCommon cmd = command "export" SectionCommon
@ -49,7 +52,7 @@ optParser _ = ExportOptions
-- An export includes both annexed files and files stored in git. -- An export includes both annexed files and files stored in git.
-- For the latter, a SHA1 key is synthesized. -- For the latter, a SHA1 key is synthesized.
data ExportKey = AnnexKey Key | GitKey Key data ExportKey = AnnexKey Key | GitKey Key
deriving (Show) deriving (Show, Eq, Ord)
asKey :: ExportKey -> Key asKey :: ExportKey -> Key
asKey (AnnexKey k) = k asKey (AnnexKey k) = k
@ -103,17 +106,22 @@ seek o = do
case map exportedTreeish old of case map exportedTreeish old of
[] -> return () [] -> return ()
[oldtreesha] -> do [oldtreesha] -> do
-- Rename all old files to temp. diffmap <- mkDiffMap oldtreesha new
mapdiff let seekdiffmap a = seekActions $ pure $ map a (M.toList diffmap)
(\diff -> startMoveToTempName r db (Git.DiffTree.file diff) (Git.DiffTree.srcsha diff)) -- Rename old files to temp, or delete.
oldtreesha new seekdiffmap $ \(ek, (moldf, mnewf)) ->
case (moldf, mnewf) of
(Just oldf, Just _newf) ->
startMoveToTempName r db oldf ek
(Just oldf, Nothing) ->
startUnexport' r db oldf ek
_ -> stop
-- Rename from temp to new files. -- Rename from temp to new files.
mapdiff (\diff -> startMoveFromTempName r db (Git.DiffTree.dstsha diff) (Git.DiffTree.file diff)) seekdiffmap $ \(ek, (moldf, mnewf)) ->
oldtreesha new case (moldf, mnewf) of
-- Remove all remaining temps. (Just _oldf, Just newf) ->
mapdiff startMoveFromTempName r db ek newf
(startUnexportTempName r db . Git.DiffTree.srcsha) _ -> stop
oldtreesha new
ts -> do ts -> do
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.." warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
forM_ ts $ \oldtreesha -> do forM_ ts $ \oldtreesha -> do
@ -126,7 +134,7 @@ seek o = do
, Git.DiffTree.dstsha d , Git.DiffTree.dstsha d
] ]
-- Don't rename to temp, because the -- Don't rename to temp, because the
-- content is unknown; unexport instead. -- content is unknown; delete instead.
mapdiff mapdiff
(\diff -> startUnexport r db (Git.DiffTree.file diff) (unexportboth diff)) (\diff -> startUnexport r db (Git.DiffTree.file diff) (unexportboth diff))
oldtreesha new oldtreesha new
@ -152,6 +160,28 @@ seek o = do
seekActions $ pure $ map a diff seekActions $ pure $ map a diff
void $ liftIO cleanup void $ liftIO cleanup
-- Map of old and new filenames for each changed ExportKey in a diff.
type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath)
mkDiffMap :: Git.Ref -> Git.Ref -> Annex DiffMap
mkDiffMap old new = do
(diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive old new
diffmap <- M.fromListWith combinedm . concat <$> forM diff mkdm
void $ liftIO cleanup
return diffmap
where
combinedm (srca, dsta) (srcb, dstb) = (srca <|> srcb, dsta <|> dstb)
mkdm i = do
srcek <- getk (Git.DiffTree.srcsha i)
dstek <- getk (Git.DiffTree.dstsha i)
return $ catMaybes
[ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek
, (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek
]
getk sha
| sha == nullSha = return Nothing
| otherwise = Just <$> exportKey sha
startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart
startExport r db ti = do startExport r db ti = do
ek <- exportKey (Git.LsTree.sha ti) ek <- exportKey (Git.LsTree.sha ti)
@ -204,6 +234,14 @@ startUnexport r db f shas = do
loc = ExportLocation $ toInternalGitPath f' loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f f' = getTopFilePath f
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startUnexport' r db f ek = do
showStart "unexport" f'
next $ performUnexport r db [ek] loc
where
loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f
performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform
performUnexport r db eks loc = do performUnexport r db eks loc = do
ifM (allM (\ek -> removeExport (exportActions r) (asKey ek) loc) eks) ifM (allM (\ek -> removeExport (exportActions r) (asKey ek) loc) eks)
@ -236,11 +274,8 @@ startUnexportTempName r db sha
showStart "unexport" f showStart "unexport" f
next $ performUnexport r db [ek] loc next $ performUnexport r db [ek] loc
startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Git.Sha -> CommandStart startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart
startMoveToTempName r db f sha startMoveToTempName r db f ek = do
| sha == nullSha = stop
| otherwise = do
ek <- exportKey sha
let tmploc@(ExportLocation tmpf) = exportTempName ek let tmploc@(ExportLocation tmpf) = exportTempName ek
showStart "rename" (f' ++ " -> " ++ tmpf) showStart "rename" (f' ++ " -> " ++ tmpf)
next $ performRename r db ek loc tmploc next $ performRename r db ek loc tmploc
@ -248,11 +283,8 @@ startMoveToTempName r db f sha
loc = ExportLocation $ toInternalGitPath f' loc = ExportLocation $ toInternalGitPath f'
f' = getTopFilePath f f' = getTopFilePath f
startMoveFromTempName :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart startMoveFromTempName :: Remote -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart
startMoveFromTempName r db sha f startMoveFromTempName r db ek f = do
| sha == nullSha = stop
| otherwise = do
ek <- exportKey sha
let tmploc@(ExportLocation tmpf) = exportTempName ek let tmploc@(ExportLocation tmpf) = exportTempName ek
stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do
showStart "rename" (tmpf ++ " -> " ++ f') showStart "rename" (tmpf ++ " -> " ++ f')

View file

@ -24,8 +24,5 @@ Work is in progress. Todo list:
export from another repository also doesn't work right, because the export from another repository also doesn't work right, because the
export database is not populated. So, seems that the export database needs export database is not populated. So, seems that the export database needs
to get populated based on the export log in these cases. to get populated based on the export log in these cases.
* Currently all modified/deleted files are renamed to temp files,
even when they won't be used. Avoid doing this unless the
temp file will be renamed to the new filename.
* Support export to aditional special remotes (S3 etc) * Support export to aditional special remotes (S3 etc)
* Support export to external special remotes. * Support export to external special remotes.