From e55e445a36b3c656113c4b20e947a67cd460c496 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Aug 2017 13:00:41 -0400 Subject: [PATCH 01/31] add API for exporting Implemented so far for the directory special remote. Several remotes don't make sense to export to. Regular Git remotes, obviously, do not. Bup remotes almost certianly do not, since bup would need to be used to extract the export; same store for Ddar. Web and Bittorrent are download-only. GCrypt is always encrypted so exporting to it would be pointless. There's probably no point complicating the Hook remotes with exporting at this point. External, S3, Glacier, WebDAV, Rsync, and possibly Tahoe should be modified to support export. Thought about trying to reuse the storeKey/retrieveKeyFile/removeKey interface, rather than adding a new interface. But, it seemed better to keep it separate, to avoid a complicated interface that sometimes encrypts/chunks key/value storage and sometimes users non-key/value storage. Any common parts can be factored out. Note that storeExport is not atomic. doc/design/exporting_trees_to_special_remotes.mdwn has some things in the "resuming exports" section that bear on this decision. Basically, I don't think, at this time, that an atomic storeExport would help with resuming, because exports are not key/value storage, and we can't be sure that a partially uploaded file is the same content we're currently trying to export. Also, note that ExportLocation will always use unix path separators. This is important, because users may export from a mix of windows and unix, and it avoids complicating the API with path conversions, and ensures that in such a mix, they always use the same locations for exports. This commit was sponsored by Bruno BEAUFILS on Patreon. --- Remote/BitTorrent.hs | 5 +++ Remote/Bup.hs | 5 +++ Remote/Ddar.hs | 5 +++ Remote/Directory.hs | 74 +++++++++++++++++++++++++++++++++++++------- Remote/External.hs | 5 +++ Remote/GCrypt.hs | 5 +++ Remote/Git.hs | 5 +++ Remote/Glacier.hs | 5 +++ Remote/Hook.hs | 5 +++ Remote/P2P.hs | 5 +++ Remote/Rsync.hs | 5 +++ Remote/S3.hs | 5 +++ Remote/Tahoe.hs | 5 +++ Remote/Web.hs | 5 +++ Remote/WebDAV.hs | 5 +++ Types/Remote.hs | 26 +++++++++++++++- 16 files changed, 158 insertions(+), 12 deletions(-) diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 2f29f5baa4..887a0898e6 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -61,6 +61,11 @@ gen r _ c gc = , lockContent = Nothing , checkPresent = checkKey , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 3a2d67bc89..aad8e6bbad 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -61,6 +61,11 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = bupLocal buprepo + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 2f8c3b3458..1da3ff4121 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -60,6 +60,11 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = ddarLocal ddarrepo + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 2452c42e29..a371a19517 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -1,6 +1,6 @@ {- A "remote" that is just a filesystem directory. - - - Copyright 2011-2014 Joey Hess + - Copyright 2011-2017 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -58,6 +58,11 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = True + , storeExport = Just $ storeExportDirectory dir + , retrieveExport = Just $ retrieveExportDirectory dir + , removeExport = Just $ removeExportDirectory dir + , checkPresentExport = Just $ checkPresentExportDirectory dir + , renameExport = Just $ renameExportDirectory dir , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -119,16 +124,18 @@ tmpDir d k = addTrailingPathSeparator $ d "tmp" keyFile k {- Check if there is enough free disk space in the remote's directory to - store the key. Note that the unencrypted key size is checked. -} prepareStore :: FilePath -> ChunkConfig -> Preparer Storer -prepareStore d chunkconfig = checkPrepare checker +prepareStore d chunkconfig = checkPrepare (checkDiskSpaceDirectory d) (byteStorer $ store d chunkconfig) where - checker k = do - annexdir <- fromRepo gitAnnexObjectDir - samefilesystem <- liftIO $ catchDefaultIO False $ - (\a b -> deviceID a == deviceID b) - <$> getFileStatus d - <*> getFileStatus annexdir - checkDiskSpace (Just d) k 0 samefilesystem + +checkDiskSpaceDirectory :: FilePath -> Key -> Annex Bool +checkDiskSpaceDirectory d k = do + annexdir <- fromRepo gitAnnexObjectDir + samefilesystem <- liftIO $ catchDefaultIO False $ + (\a b -> deviceID a == deviceID b) + <$> getFileStatus d + <*> getFileStatus annexdir + checkDiskSpace (Just d) k 0 samefilesystem store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex Bool store d chunkconfig k b p = liftIO $ do @@ -211,11 +218,56 @@ removeDirGeneric topdir dir = do checkKey :: FilePath -> ChunkConfig -> CheckPresent checkKey d (LegacyChunks _) k = Legacy.checkKey d locations k -checkKey d _ k = liftIO $ - ifM (anyM doesFileExist (locations d k)) +checkKey d _ k = checkPresentGeneric d (locations d k) + +checkPresentGeneric :: FilePath -> [FilePath] -> Annex Bool +checkPresentGeneric d ps = liftIO $ + ifM (anyM doesFileExist ps) ( return True , ifM (doesDirectoryExist d) ( return False , giveup $ "directory " ++ d ++ " is not accessible" ) ) + +exportPath :: FilePath -> ExportLocation -> FilePath +exportPath d (ExportLocation loc) = d loc + +storeExportDirectory :: FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool +storeExportDirectory d k loc p = sendAnnex k rollback send + where + dest = exportPath d loc + send src = liftIO $ catchBoolIO $ do + createDirectoryIfMissing True dest + withMeteredFile src p (L.writeFile dest) + return True + rollback = liftIO $ nukeFile dest + +retrieveExportDirectory :: FilePath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex (Bool, Verification) +retrieveExportDirectory d _k loc dest p = unVerified $ liftIO $ catchBoolIO $ do + withMeteredFile src p (L.writeFile dest) + return True + where + src = exportPath d loc + +removeExportDirectory :: FilePath -> Key -> ExportLocation -> Annex Bool +removeExportDirectory d _k loc = liftIO $ do + nukeFile src + void $ tryIO $ removeDirectory $ takeDirectory src + return True + where + src = exportPath d loc + +checkPresentExportDirectory :: FilePath -> Key -> ExportLocation -> Annex Bool +checkPresentExportDirectory d _k loc = + checkPresentGeneric d [exportPath d loc] + +renameExportDirectory :: FilePath -> Key -> ExportLocation -> ExportLocation -> Annex Bool +renameExportDirectory d _k oldloc newloc = liftIO $ catchBoolIO $ do + createDirectoryIfMissing True dest + renameFile src dest + void $ tryIO $ removeDirectory $ takeDirectory src + return True + where + src = exportPath d oldloc + dest = exportPath d newloc diff --git a/Remote/External.hs b/Remote/External.hs index 32b95e9bb3..dd62c1539e 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -85,6 +85,11 @@ gen r u c gc , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = towhereis , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 2ccc47ad89..95b7ae2875 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -114,6 +114,11 @@ gen' r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = repoCheap r + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Git.hs b/Remote/Git.hs index b48b48b529..020cd1c619 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -157,6 +157,11 @@ gen r u c gc , lockContent = Just (lockKey new) , checkPresent = inAnnex new , checkPresentCheap = repoCheap r + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = if Git.repoIsUrl r then Nothing diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index c2f9bcf122..be65cecb76 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -57,6 +57,11 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 0ebbf9139a..2a98742426 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -51,6 +51,11 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/P2P.hs b/Remote/P2P.hs index 118262b3ca..d77ac89d8f 100644 --- a/Remote/P2P.hs +++ b/Remote/P2P.hs @@ -57,6 +57,11 @@ chainGen addr r u c gc = do , lockContent = Just (lock u addr connpool) , checkPresent = checkpresent u addr connpool , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 4fc55d7259..d40d23baef 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -73,6 +73,11 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/S3.hs b/Remote/S3.hs index c05831b0b3..ce67765955 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -84,6 +84,11 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Just (getWebUrls info) , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index e4686f2f25..cf65634b04 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -75,6 +75,11 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkKey u hdl , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Just (getWhereisKey u) , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Web.hs b/Remote/Web.hs index be2f265e08..4d55389ec5 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -50,6 +50,11 @@ gen r _ c gc = , lockContent = Nothing , checkPresent = checkKey , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 2c4d24c359..9230a027d3 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -68,6 +68,11 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False + , storeExport = Nothing + , retrieveExport = Nothing + , removeExport = Nothing + , checkPresentExport = Nothing + , renameExport = Nothing , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Types/Remote.hs b/Types/Remote.hs index bd75840b30..d4b76f54f3 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -2,7 +2,7 @@ - - Most things should not need this, using Types instead - - - Copyright 2011-2014 Joey Hess + - Copyright 2011-2017 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -18,6 +18,7 @@ module Types.Remote , Availability(..) , Verification(..) , unVerified + , ExportLocation(..) ) where @@ -69,6 +70,7 @@ data RemoteA a = Remote { name :: RemoteName, -- Remotes have a use cost; higher is more expensive cost :: Cost, + -- Transfers a key's contents from disk to the remote. -- The key should not appear to be present on the remote until -- all of its contents have been transferred. @@ -94,6 +96,23 @@ data RemoteA a = Remote { -- Some remotes can checkPresent without an expensive network -- operation. checkPresentCheap :: Bool, + + -- Exports a key's contents to an ExportLocation. + -- The exported file does not need to be updated atomically. + storeExport :: Maybe (Key -> ExportLocation -> MeterUpdate -> a Bool), + -- Retrieves an exported key to a file. + -- (The MeterUpdate does not need to be used if it writes + -- sequentially to the file.) + retrieveExport :: Maybe (Key -> ExportLocation -> FilePath -> MeterUpdate -> a (Bool, Verification)), + -- Removes an exported key (succeeds if the contents are not present) + removeExport :: Maybe (Key -> ExportLocation -> a Bool), + -- Checks if a key is exported to the remote at the specified + -- ExportLocation. + -- Throws an exception if the remote cannot be accessed. + checkPresentExport :: Maybe (Key -> ExportLocation -> a Bool), + -- Renames an already exported key. + renameExport :: Maybe (Key -> ExportLocation -> ExportLocation -> a Bool), + -- Some remotes can provide additional details for whereis. whereisKey :: Maybe (Key -> a [String]), -- Some remotes can run a fsck operation on the remote, @@ -150,3 +169,8 @@ unVerified :: Monad m => m Bool -> m (Bool, Verification) unVerified a = do ok <- a return (ok, UnVerified) + +-- A location on a remote that a key can be exported to. +-- The FilePath will be relative, and may contain unix-style path +-- separators. +newtype ExportLocation = ExportLocation FilePath From 8f35c6584d93bc49536af1eb404e87b49207e6a5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Aug 2017 13:25:48 -0400 Subject: [PATCH 02/31] documentation for export This commit was sponsored by Ole-Morten Duesund on Patreon. --- doc/git-annex-export.mdwn | 37 ++++++++++++++++++++++++++++++ doc/git-annex-import.mdwn | 2 ++ doc/git-annex.mdwn | 6 +++++ doc/special_remotes/directory.mdwn | 4 ++++ 4 files changed, 49 insertions(+) create mode 100644 doc/git-annex-export.mdwn diff --git a/doc/git-annex-export.mdwn b/doc/git-annex-export.mdwn new file mode 100644 index 0000000000..96a09dc9b6 --- /dev/null +++ b/doc/git-annex-export.mdwn @@ -0,0 +1,37 @@ +# NAME + +git-annex export - export content to a remote + +# SYNOPSIS + +git annex export `treeish --to remote` + +# DESCRIPTION + +Use this command to export a tree of files from a git-annex repository. + +Normally files are stored on a git-annex special remote named by their +keys. That is great for data storage, but your filenames are obscured. +Exporting replicates the tree to the special remote as-is. + +Mixing key/value and exports in the same remote would be a mess and so is +not allowed. So, you have to configure a remote with `exporttree=true` +when initially setting it up with [[git-annex-initremote]](1). + +Repeated exports are done efficiently, by diffing the old and new tree, +and transferring only the changed files. + +Exports can be interrupted and resumed. However, partially uploaded files +will be re-started from the beginning. + +# SEE ALSO + +[[git-annex]](1) + +[[git-annex-export]](1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex-import.mdwn b/doc/git-annex-import.mdwn index 22b3c3941c..3684505b62 100644 --- a/doc/git-annex-import.mdwn +++ b/doc/git-annex-import.mdwn @@ -96,6 +96,8 @@ instead of to the annex. [[git-annex-add]](1) +[[git-annex-export]](1) + # AUTHOR Joey Hess diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 14a787219b..544baafa1b 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -158,6 +158,12 @@ subdirectories). See [[git-annex-importfeed]](1) for details. +* `export treeish --to remote` + + Export content to a remote. + + See [[git-annex-export]](1) for details. + * `undo [filename|directory] ...` Undo last change to a file or directory. diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn index 5584f31f39..e3f7f1e45f 100644 --- a/doc/special_remotes/directory.mdwn +++ b/doc/special_remotes/directory.mdwn @@ -31,6 +31,10 @@ remote: Do not use for new remotes. It is not safe to change the chunksize setting of an existing remote. +* `exporttree` - Set to "true" to make this special remote usable + by [[git-annex-export]]. It will not be usable as a general-purpose + special remote. + Setup example: # git annex initremote usbdrive type=directory directory=/media/usbdrive/ encryption=none From cca2764f91bfa42e4489631df5ff192b472a9100 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Aug 2017 13:51:00 -0400 Subject: [PATCH 03/31] provide file with content to export Rather than providing the key to export, provide the file. When exporting a treeish that contains files that are not annexed, this will let the content of those files also be exported. There's still a Key in the interface; it will be used by the external special remote protocol. A SHA1 key can be used when exporting non-annexed files. This commit was sponsored by Brock Spratlen on Patreon. --- Remote/Directory.hs | 12 +++++------- Types/Remote.hs | 12 ++++++------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index a371a19517..342b5bc578 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -233,15 +233,13 @@ checkPresentGeneric d ps = liftIO $ exportPath :: FilePath -> ExportLocation -> FilePath exportPath d (ExportLocation loc) = d loc -storeExportDirectory :: FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool -storeExportDirectory d k loc p = sendAnnex k rollback send +storeExportDirectory :: FilePath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool +storeExportDirectory d src _k loc p = liftIO $ catchBoolIO $ do + createDirectoryIfMissing True dest + withMeteredFile src p (L.writeFile dest) + return True where dest = exportPath d loc - send src = liftIO $ catchBoolIO $ do - createDirectoryIfMissing True dest - withMeteredFile src p (L.writeFile dest) - return True - rollback = liftIO $ nukeFile dest retrieveExportDirectory :: FilePath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex (Bool, Verification) retrieveExportDirectory d _k loc dest p = unVerified $ liftIO $ catchBoolIO $ do diff --git a/Types/Remote.hs b/Types/Remote.hs index d4b76f54f3..6a4d2039ea 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -97,20 +97,20 @@ data RemoteA a = Remote { -- operation. checkPresentCheap :: Bool, - -- Exports a key's contents to an ExportLocation. + -- Exports content to an ExportLocation. -- The exported file does not need to be updated atomically. - storeExport :: Maybe (Key -> ExportLocation -> MeterUpdate -> a Bool), - -- Retrieves an exported key to a file. + storeExport :: Maybe (FilePath -> Key -> ExportLocation -> MeterUpdate -> a Bool), + -- Retrieves exported content to a file. -- (The MeterUpdate does not need to be used if it writes -- sequentially to the file.) retrieveExport :: Maybe (Key -> ExportLocation -> FilePath -> MeterUpdate -> a (Bool, Verification)), - -- Removes an exported key (succeeds if the contents are not present) + -- Removes an exported file (succeeds if the contents are not present) removeExport :: Maybe (Key -> ExportLocation -> a Bool), - -- Checks if a key is exported to the remote at the specified + -- Checks if anything is exported to the remote at the specified -- ExportLocation. -- Throws an exception if the remote cannot be accessed. checkPresentExport :: Maybe (Key -> ExportLocation -> a Bool), - -- Renames an already exported key. + -- Renames an already exported file. renameExport :: Maybe (Key -> ExportLocation -> ExportLocation -> a Bool), -- Some remotes can provide additional details for whereis. From 9f3630f4e0a066aaf410fb1fe5eec1ca6ba2f80c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Aug 2017 14:58:38 -0400 Subject: [PATCH 04/31] initial export command Very basic operation works, but of course this is only the beginning. This commit was sponsored by Nick Daly on Patreon. --- CmdLine/GitAnnex.hs | 2 + CmdLine/Usage.hs | 2 + Command/Export.hs | 103 +++++++++++++++++++++++++++++++++++++++++++ Remote/Directory.hs | 2 +- doc/todo/export.mdwn | 13 ++++++ 5 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 Command/Export.hs diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index be5f56ba0a..1a5a13839b 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -95,6 +95,7 @@ import qualified Command.AddUrl import qualified Command.ImportFeed import qualified Command.RmUrl import qualified Command.Import +import qualified Command.Export import qualified Command.Map import qualified Command.Direct import qualified Command.Indirect @@ -141,6 +142,7 @@ cmds testoptparser testrunner = , Command.ImportFeed.cmd , Command.RmUrl.cmd , Command.Import.cmd + , Command.Export.cmd , Command.Init.cmd , Command.Describe.cmd , Command.InitRemote.cmd diff --git a/CmdLine/Usage.hs b/CmdLine/Usage.hs index c4522788fd..6dd2d053d9 100644 --- a/CmdLine/Usage.hs +++ b/CmdLine/Usage.hs @@ -94,6 +94,8 @@ paramAddress :: String paramAddress = "ADDRESS" paramItem :: String paramItem = "ITEM" +paramTreeish :: String +paramTreeish = "TREEISH" paramKeyValue :: String paramKeyValue = "K=V" paramNothing :: String diff --git a/Command/Export.hs b/Command/Export.hs new file mode 100644 index 0000000000..7de143ffb8 --- /dev/null +++ b/Command/Export.hs @@ -0,0 +1,103 @@ +{- git-annex command + - + - Copyright 2017 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Export where + +import Command +import qualified Git +import qualified Git.DiffTree +import Git.Sha +import Git.FilePath +import Types.Key +import Types.Remote +import Annex.Content +import Annex.CatFile +import Messages.Progress +import Utility.Tmp + +import qualified Data.ByteString.Lazy as L + +cmd :: Command +cmd = command "export" SectionCommon + "export content to a remote" + paramTreeish (seek <$$> optParser) + +data ExportOptions = ExportOptions + { exportTreeish :: Git.Ref + , exportRemote :: DeferredParse Remote + } + +optParser :: CmdParamsDesc -> Parser ExportOptions +optParser _ = ExportOptions + <$> (Git.Ref <$> parsetreeish) + <*> (parseRemoteOption <$> parseToOption) + where + parsetreeish = argument str + ( metavar paramTreeish + ) + +seek :: ExportOptions -> CommandSeek +seek o = do + r <- getParsed (exportRemote o) + let oldtreeish = emptyTree -- XXX temporary + (diff, cleanup) <- inRepo $ + Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o) + seekActions $ pure $ map (start r) diff + void $ liftIO cleanup + +start :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart +start r diff + | Git.DiffTree.dstsha diff == nullSha = do + showStart "unexport" f + oldk <- either id id <$> exportKey (Git.DiffTree.srcsha diff) + next $ performUnexport r oldk loc + | otherwise = do + showStart "export" f + next $ performExport r diff loc + where + loc = ExportLocation $ toInternalGitPath $ + getTopFilePath $ Git.DiffTree.file diff + f = getTopFilePath $ Git.DiffTree.file diff + +performExport :: Remote -> Git.DiffTree.DiffTreeItem -> ExportLocation -> CommandPerform +performExport r diff loc = case storeExport r of + Nothing -> error "remote does not support exporting files" + Just storer -> next $ do + v <- exportKey (Git.DiffTree.dstsha diff) + case v of + Right k -> metered Nothing k $ \m -> + sendAnnex k + (void $ performUnexport r k loc) + (\f -> storer f k loc m) + -- Sending a non-annexed file. + Left sha1k -> metered Nothing sha1k $ \m -> + withTmpFile "export" $ \tmp h -> do + b <- catObject (Git.DiffTree.dstsha diff) + liftIO $ L.hPut h b + liftIO $ hClose h + storer tmp sha1k loc m + +performUnexport :: Remote -> Key -> ExportLocation -> CommandPerform +performUnexport r k loc = case removeExport r of + Nothing -> error "remote does not support removing exported files" + Just remover -> next $ remover k loc + +-- When the Sha points to an annexed file, get the key as Right. +-- When the Sha points to a non-annexed file, convert to a SHA1 key, +-- as Left. +exportKey :: Git.Sha -> Annex (Either Key Key) +exportKey sha = mk <$> catKey sha + where + mk (Just k) = Right k + mk Nothing = Left $ Key + { keyName = show sha + , keyVariety = SHA1Key (HasExt False) + , keySize = Nothing + , keyMtime = Nothing + , keyChunkSize = Nothing + , keyChunkNum = Nothing + } diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 342b5bc578..7f0f465128 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -235,7 +235,7 @@ exportPath d (ExportLocation loc) = d loc storeExportDirectory :: FilePath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool storeExportDirectory d src _k loc p = liftIO $ catchBoolIO $ do - createDirectoryIfMissing True dest + createDirectoryIfMissing True (takeDirectory dest) withMeteredFile src p (L.writeFile dest) return True where diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index e729b0cf17..f589e953d9 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -14,3 +14,16 @@ Would this be able to reuse the existing `storeKey` interface, or would there need to be a new interface in supported remotes? --[[Joey]] + +Work is in progress. Todo list: + +* Remember the previously exported tree (in git-annex branch, see design) + and use to make next export more efficient. +* Only export to remotes that were initialized to support it. +* Prevent using export remotes for key/value storage. +* When exporting, update location tracking to allow getting from exports, +* Use retrieveExport when getting from export remotes. +* Efficient handling of renames. +* Detect export conflicts (see design) +* Support export to aditional special remotes (S3 etc) +* Support export to external special remotes. From efe3910c0498c888c2d5f7cc70f9fa999643bc6b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 31 Aug 2017 12:32:02 -0400 Subject: [PATCH 05/31] remove empty parent dirs when removing from export --- Remote/Directory.hs | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 7f0f465128..abbde1ceba 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -230,9 +230,6 @@ checkPresentGeneric d ps = liftIO $ ) ) -exportPath :: FilePath -> ExportLocation -> FilePath -exportPath d (ExportLocation loc) = d loc - storeExportDirectory :: FilePath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool storeExportDirectory d src _k loc p = liftIO $ catchBoolIO $ do createDirectoryIfMissing True (takeDirectory dest) @@ -251,7 +248,7 @@ retrieveExportDirectory d _k loc dest p = unVerified $ liftIO $ catchBoolIO $ do removeExportDirectory :: FilePath -> Key -> ExportLocation -> Annex Bool removeExportDirectory d _k loc = liftIO $ do nukeFile src - void $ tryIO $ removeDirectory $ takeDirectory src + removeExportLocation d loc return True where src = exportPath d loc @@ -264,8 +261,21 @@ renameExportDirectory :: FilePath -> Key -> ExportLocation -> ExportLocation -> renameExportDirectory d _k oldloc newloc = liftIO $ catchBoolIO $ do createDirectoryIfMissing True dest renameFile src dest - void $ tryIO $ removeDirectory $ takeDirectory src + removeExportLocation d oldloc return True where src = exportPath d oldloc dest = exportPath d newloc + +exportPath :: FilePath -> ExportLocation -> FilePath +exportPath d (ExportLocation loc) = d loc + +{- Removes the ExportLocation directory and its parents, so long as + - they're empty, up to but not including the topdir. -} +removeExportLocation :: FilePath -> ExportLocation -> IO () +removeExportLocation topdir (ExportLocation loc) = go (Just loc) (Right ()) + where + go _ (Left _e) = return () + go Nothing _ = return () + go (Just loc') _ = go (upFrom loc') + =<< tryIO (removeDirectory $ exportPath topdir (ExportLocation loc')) From 4694e49158e41426c32e6b8f4ca2bee91de973c6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 31 Aug 2017 12:37:25 -0400 Subject: [PATCH 06/31] fix error message when content to export is not locally available --- Command/Export.hs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index 7de143ffb8..a2632857a3 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -16,6 +16,7 @@ import Types.Key import Types.Remote import Annex.Content import Annex.CatFile +import Logs.Location import Messages.Progress import Utility.Tmp @@ -69,10 +70,15 @@ performExport r diff loc = case storeExport r of Just storer -> next $ do v <- exportKey (Git.DiffTree.dstsha diff) case v of - Right k -> metered Nothing k $ \m -> - sendAnnex k - (void $ performUnexport r k loc) - (\f -> storer f k loc m) + Right k -> ifM (inAnnex k) + ( metered Nothing k $ \m -> + sendAnnex k + (void $ performUnexport r k loc) + (\f -> storer f k loc m) + , do + showNote "not available" + return False + ) -- Sending a non-annexed file. Left sha1k -> metered Nothing sha1k $ \m -> withTmpFile "export" $ \tmp h -> do From e662aceeac7a1c1dff9b2789c8d05a7aedc06503 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 31 Aug 2017 12:47:08 -0400 Subject: [PATCH 07/31] improve type --- CmdLine/Seek.hs | 4 ++-- Command/FindRef.hs | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 66cd985f41..556a108eb7 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -77,12 +77,12 @@ withFilesNotInGit skipdotfiles a params go l = seekActions $ prepFiltered a $ return $ concat $ segmentPaths params l -withFilesInRefs :: (FilePath -> Key -> CommandStart) -> CmdParams -> CommandSeek +withFilesInRefs :: (FilePath -> Key -> CommandStart) -> [Git.Ref] -> CommandSeek withFilesInRefs a = mapM_ go where go r = do matcher <- Limit.getMatcher - (l, cleanup) <- inRepo $ LsTree.lsTree (Git.Ref r) + (l, cleanup) <- inRepo $ LsTree.lsTree r forM_ l $ \i -> do let f = getTopFilePath $ LsTree.file i v <- catKey (LsTree.sha i) diff --git a/Command/FindRef.hs b/Command/FindRef.hs index cb14371e0c..93315bcef2 100644 --- a/Command/FindRef.hs +++ b/Command/FindRef.hs @@ -9,6 +9,7 @@ module Command.FindRef where import Command import qualified Command.Find as Find +import qualified Git cmd :: Command cmd = withGlobalOptions nonWorkTreeMatchingOptions $ Find.mkCommand $ @@ -17,4 +18,4 @@ cmd = withGlobalOptions nonWorkTreeMatchingOptions $ Find.mkCommand $ paramRef (seek <$$> Find.optParser) seek :: Find.FindOptions -> CommandSeek -seek o = Find.start o `withFilesInRefs` Find.findThese o +seek o = Find.start o `withFilesInRefs` (map Git.Ref $ Find.findThese o) From 7c7af825782b7c8706039b855c72709993542be4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 31 Aug 2017 13:29:54 -0400 Subject: [PATCH 08/31] resuming exports Make a pass over the whole exported tree, and upload anything that has not yet reached the export. Update location log when exporting. Note that the synthesized keys for non-annexed files are stored in the location log too. Some cases involving files in the tree with the same content are not handled correctly yet. This commit was sponsored by Boyd Stephen Smith Jr. on Patreon. --- Command/Export.hs | 153 +++++++++++------- .../exporting_trees_to_special_remotes.mdwn | 5 + doc/todo/export.mdwn | 10 ++ 3 files changed, 113 insertions(+), 55 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index a2632857a3..aba8a18771 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -10,6 +10,8 @@ module Command.Export where import Command import qualified Git import qualified Git.DiffTree +import qualified Git.LsTree +import Git.Types import Git.Sha import Git.FilePath import Types.Key @@ -41,65 +43,19 @@ optParser _ = ExportOptions ( metavar paramTreeish ) -seek :: ExportOptions -> CommandSeek -seek o = do - r <- getParsed (exportRemote o) - let oldtreeish = emptyTree -- XXX temporary - (diff, cleanup) <- inRepo $ - Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o) - seekActions $ pure $ map (start r) diff - void $ liftIO cleanup +-- An export includes both annexed files and files stored in git. +-- For the latter, a SHA1 key is synthesized. +data ExportKey = AnnexKey Key | GitKey Key -start :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart -start r diff - | Git.DiffTree.dstsha diff == nullSha = do - showStart "unexport" f - oldk <- either id id <$> exportKey (Git.DiffTree.srcsha diff) - next $ performUnexport r oldk loc - | otherwise = do - showStart "export" f - next $ performExport r diff loc - where - loc = ExportLocation $ toInternalGitPath $ - getTopFilePath $ Git.DiffTree.file diff - f = getTopFilePath $ Git.DiffTree.file diff +asKey :: ExportKey -> Key +asKey (AnnexKey k) = k +asKey (GitKey k) = k -performExport :: Remote -> Git.DiffTree.DiffTreeItem -> ExportLocation -> CommandPerform -performExport r diff loc = case storeExport r of - Nothing -> error "remote does not support exporting files" - Just storer -> next $ do - v <- exportKey (Git.DiffTree.dstsha diff) - case v of - Right k -> ifM (inAnnex k) - ( metered Nothing k $ \m -> - sendAnnex k - (void $ performUnexport r k loc) - (\f -> storer f k loc m) - , do - showNote "not available" - return False - ) - -- Sending a non-annexed file. - Left sha1k -> metered Nothing sha1k $ \m -> - withTmpFile "export" $ \tmp h -> do - b <- catObject (Git.DiffTree.dstsha diff) - liftIO $ L.hPut h b - liftIO $ hClose h - storer tmp sha1k loc m - -performUnexport :: Remote -> Key -> ExportLocation -> CommandPerform -performUnexport r k loc = case removeExport r of - Nothing -> error "remote does not support removing exported files" - Just remover -> next $ remover k loc - --- When the Sha points to an annexed file, get the key as Right. --- When the Sha points to a non-annexed file, convert to a SHA1 key, --- as Left. -exportKey :: Git.Sha -> Annex (Either Key Key) +exportKey :: Git.Sha -> Annex ExportKey exportKey sha = mk <$> catKey sha where - mk (Just k) = Right k - mk Nothing = Left $ Key + mk (Just k) = AnnexKey k + mk Nothing = GitKey $ Key { keyName = show sha , keyVariety = SHA1Key (HasExt False) , keySize = Nothing @@ -107,3 +63,90 @@ exportKey sha = mk <$> catKey sha , keyChunkSize = Nothing , keyChunkNum = Nothing } + +seek :: ExportOptions -> CommandSeek +seek o = do + r <- getParsed (exportRemote o) + let oldtreeish = emptyTree -- XXX temporary + + -- First, diff the old and new trees and update all changed + -- files in the export. + (diff, cleanup) <- inRepo $ + Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o) + seekActions $ pure $ map (startDiff r) diff + void $ liftIO cleanup + + -- In case a previous export was incomplete, make a pass + -- over the whole tree and export anything that is not + -- yet exported. + (l, cleanup') <- inRepo $ Git.LsTree.lsTree (exportTreeish o) + seekActions $ pure $ map (start r) l + void $ liftIO cleanup' + +startDiff :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart +startDiff r diff + | Git.DiffTree.dstsha diff == nullSha = do + showStart "unexport" f + oldk <- exportKey (Git.DiffTree.srcsha diff) + next $ performUnexport r oldk loc + | otherwise = do + showStart "export" f + k <- exportKey (Git.DiffTree.dstsha diff) + next $ performExport r k (Git.DiffTree.dstsha diff) loc + where + loc = ExportLocation $ toInternalGitPath $ + getTopFilePath $ Git.DiffTree.file diff + f = getTopFilePath $ Git.DiffTree.file diff + +start :: Remote -> Git.LsTree.TreeItem -> CommandStart +start r ti = do + ek <- exportKey (Git.LsTree.sha ti) + stopUnless (elem (uuid r) <$> loggedLocations (asKey ek)) $ + next $ performExport r ek (Git.LsTree.sha ti) loc + where + loc = ExportLocation $ toInternalGitPath $ + getTopFilePath $ Git.LsTree.file ti + +performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform +performExport r ek contentsha loc = case storeExport r of + Nothing -> error "remote does not support exporting files" + Just storer -> do + sent <- case ek of + AnnexKey k -> ifM (inAnnex k) + ( metered Nothing k $ \m -> do + let rollback = void $ performUnexport r ek loc + sendAnnex k rollback + (\f -> storer f k loc m) + , do + showNote "not available" + return False + ) + -- Sending a non-annexed file. + GitKey sha1k -> metered Nothing sha1k $ \m -> + withTmpFile "export" $ \tmp h -> do + b <- catObject contentsha + liftIO $ L.hPut h b + liftIO $ hClose h + storer tmp sha1k loc m + if sent + then next $ cleanupExport r ek + else stop + +cleanupExport :: Remote -> ExportKey -> CommandCleanup +cleanupExport r ek = do + logChange (asKey ek) (uuid r) InfoPresent + return True + +performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform +performUnexport r ek loc = case removeExport r of + Nothing -> error "remote does not support removing exported files" + Just remover -> do + ok <- remover (asKey ek) loc + if ok + then next $ cleanupUnexport r ek + else stop + +cleanupUnexport :: Remote -> ExportKey -> CommandCleanup +cleanupUnexport r ek = do + logChange (asKey ek) (uuid r) InfoMissing + return True diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index ce7431141f..c9b2b72e59 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -175,6 +175,11 @@ except for when the WORM or URL backend is used. So, prevent the user from exporting such keys. Also, force verification on for such special remotes, don't let it be turned off. +The same file contents may be in a treeish multiple times under different +filenames. That complicates using location tracking. One file may have been +exported and the other not, and location tracking says that the content +is present in the export. + ## recording exported filenames in git-annex branch In order to download the content of a key from a file exported diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index f589e953d9..354dc84e7b 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -27,3 +27,13 @@ Work is in progress. Todo list: * Detect export conflicts (see design) * Support export to aditional special remotes (S3 etc) * Support export to external special remotes. +* If the same content is present in two different files, export + location tracking can be messed up. + + When one of the files is deleted and + that tree is exported, the location log for the key will be updated + to say it's not present, even though the other file is still present. + + And, once one of the files is uploaded, the location log will + say the content is present, so the pass over the tree won't try to + upload the other file. From bb08b1abd207aeecccbc7060e523b011d80cb35b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 31 Aug 2017 14:24:32 -0400 Subject: [PATCH 09/31] make storeExport atomic This avoids needing to deal with the complexity of partially transferred files in the export. We'd not be able to resume uploading to such a file anyway, so just avoid them. The implementation in Remote.Directory is not completely ideal, because it could leave the temp file hanging around in the export directory. This only happens if it's killed with -9, or there's a power failure; normally viaTmp cleans up after itself, even when interrupted. I could not see a better way to do it though, since the export directory might be the root of a filesystem. Also some design thoughts on resuming, which depend on storeExport being atomic. This commit was sponsored by Fernando Jimenez on Partreon. --- Remote/Directory.hs | 12 ++- Types/Remote.hs | 3 +- Utility/Tmp.hs | 2 +- .../exporting_trees_to_special_remotes.mdwn | 74 +++++++++++-------- 4 files changed, 50 insertions(+), 41 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index abbde1ceba..f5d7f7e49c 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -29,6 +29,7 @@ import qualified Remote.Directory.LegacyChunked as Legacy import Annex.Content import Annex.UUID import Utility.Metered +import Utility.Tmp remote :: RemoteType remote = RemoteType { @@ -116,11 +117,6 @@ getLocation d k = do storeDir :: FilePath -> Key -> FilePath storeDir d k = addTrailingPathSeparator $ d hashDirLower def k keyFile k -{- Where we store temporary data for a key, in the directory, as it's being - - written. -} -tmpDir :: FilePath -> Key -> FilePath -tmpDir d k = addTrailingPathSeparator $ d "tmp" keyFile k - {- Check if there is enough free disk space in the remote's directory to - store the key. Note that the unencrypted key size is checked. -} prepareStore :: FilePath -> ChunkConfig -> Preparer Storer @@ -148,7 +144,7 @@ store d chunkconfig k b p = liftIO $ do finalizeStoreGeneric tmpdir destdir return True where - tmpdir = tmpDir d k + tmpdir = addTrailingPathSeparator $ d "tmp" keyFile k destdir = storeDir d k {- Passed a temp directory that contains the files that should be placed @@ -233,7 +229,9 @@ checkPresentGeneric d ps = liftIO $ storeExportDirectory :: FilePath -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool storeExportDirectory d src _k loc p = liftIO $ catchBoolIO $ do createDirectoryIfMissing True (takeDirectory dest) - withMeteredFile src p (L.writeFile dest) + -- Write via temp file so that checkPresentGeneric will not + -- see it until it's fully stored. + viaTmp (\tmp () -> withMeteredFile src p (L.writeFile tmp)) dest () return True where dest = exportPath d loc diff --git a/Types/Remote.hs b/Types/Remote.hs index 6a4d2039ea..6e78bf2386 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -98,7 +98,8 @@ data RemoteA a = Remote { checkPresentCheap :: Bool, -- Exports content to an ExportLocation. - -- The exported file does not need to be updated atomically. + -- The exported file should not appear to be present on the remote + -- until all of its contents have been transferred. storeExport :: Maybe (FilePath -> Key -> ExportLocation -> MeterUpdate -> a Bool), -- Retrieves exported content to a file. -- (The MeterUpdate does not need to be used if it writes diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs index 6a541cfe40..ca611e0b4c 100644 --- a/Utility/Tmp.hs +++ b/Utility/Tmp.hs @@ -28,7 +28,7 @@ type Template = String {- Runs an action like writeFile, writing to a temp file first and - then moving it into place. The temp file is stored in the same - directory as the final file to avoid cross-device renames. -} -viaTmp :: (MonadMask m, MonadIO m) => (FilePath -> String -> m ()) -> FilePath -> String -> m () +viaTmp :: (MonadMask m, MonadIO m) => (FilePath -> v -> m ()) -> FilePath -> v -> m () viaTmp a file content = bracketIO setup cleanup use where (dir, base) = splitFileName file diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index c9b2b72e59..835a866404 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -69,11 +69,6 @@ To efficiently update an export, git-annex can diff the tree that was exported with the new tree. The naive approach is to upload new and modified files and remove deleted files. -Note that a file may have been partially uploaded to an export, and then -the export updated to a tree without that file. So, need to try to delete -all removed files, even if location tracking does not say that the special -remote contains them. - With rename detection, if the special remote supports moving files, more efficient updates can be done. It gets complicated; consider two files that swap names. @@ -81,33 +76,6 @@ that swap names. If the special remote supports copying files, that would also make some updates more efficient. -## resuming exports - -Resuming an interrupted export needs to work well. - -There are two cases here: - -1. Some of the files in the tree have been uploaded; others have not. -2. A file has been partially uploaded. - -These two cases need to be disentangled somehow in order to handle -them. One way is to use the location log as follows: - -* Before a file is uploaded, look up what key is currently exported - using that filename. If there is one, update the location log, - saying it's not present in the special remote. -* Upload the file. -* Update the location log for the newly exported key. - -Note that this method does not allow resuming a partial upload by appending to -a file, because we don't know if the file actually started to be uploaded, or -if the file instead still has the old key's content. Instead, the whole -file needs to be re-uploaded. - -Alternative: Keep an index file that's the current state of the export. -See comment #4 of [[todo/export]]. Not sure if that works? Perhaps it -would be overkill if it's only used to support resuming partial uploads. - ## changes to special remote interface This needs some additional methods added to special remotes, and to @@ -123,6 +91,9 @@ Here's the changes to the latter: * `TRANSFEREXPORT STORE|RETRIEVE Key File` Requests the transfer of a File on local disk to or from the previously provided Name on the special remote. + Note that it's important that, while a file is being stored, + CHECKPRESENTEXPORT not indicate it's present until all the data has + been transferred. The remote responds with either `TRANSFER-SUCCESS` or `TRANSFER-FAILURE`, and a remote where exports do not make sense may always fail. @@ -241,3 +212,42 @@ re-uploads, but it's reasonably efficient. The documentation should suggest strongly only exporting to a given special remote from a single repository, or having some other rule that avoids export conflicts. + +## when to update export.log for efficient resuming of exports + +When should `export.log` be updated? Possibilities: + +* Before performing any work, to set the goal. +* After the export is fully successful, to record the current state. +* After some mid-point. + +Lots of things could go wrong during an export. A file might fail to be +transferred or only part of it be transferred; a file's content might not +be present to transfer at all. The export could be interrupted part way. +Updating the export.log at the right point in time is important to handle +these cases efficiently. + +If the export.log is updated first, then it's only a goal and does not tell +us what's been done already. + +If the export.log is updated only after complete success, then the common +case of some files not having content locally present will prevent it from +being updated. When we resume, we again don't know what's been done +already. + +If the export.log is updated after deleting any files from the +remote that are not the same in the new treeish as in the old treeish, +and as long as TRANSFEREXPORT STORE is atomic, then when resuming we can +trust CHECKPRESENTEXPORT to only find files that have the correct content +for the current treeish. (Unless a conflicting export was made from +elsewhere, but in that case, the conflict resolution will have to fix up +later.) + +Efficient resuming can then first check if the location log says the +export contains the content. (If not, transfer a copy.) If the location +log says the export contains the content, use CHECKPRESENTEXPORT to see if +the file exists, and if not transfer a copy. The CHECKPRESENTEXPORT check +deals with the case where the treeish has two files with the same content. +If we have a key-to-files map for the export, then we can skip the +CHECKPRESENTEXPORT check when there's only one file using a key. So, +resuming can be quite efficient. From 978885247e86a914ee2fc081e5ebd65e4513f945 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 31 Aug 2017 15:41:48 -0400 Subject: [PATCH 10/31] implement export.log and resolve export conflicts Incremental export updates work now too. This commit was sponsored by Anthony DeRobertis on Patreon. --- Command/Export.hs | 80 ++++++++++++++++++++++++++------------------ Logs.hs | 4 +++ Logs/Export.hs | 67 +++++++++++++++++++++++++++++++++++++ doc/internals.mdwn | 8 +++-- doc/todo/export.mdwn | 6 +--- git-annex.cabal | 1 + 6 files changed, 126 insertions(+), 40 deletions(-) create mode 100644 Logs/Export.hs diff --git a/Command/Export.hs b/Command/Export.hs index aba8a18771..1310244aca 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -11,14 +11,16 @@ import Command import qualified Git import qualified Git.DiffTree import qualified Git.LsTree +import qualified Git.Ref import Git.Types -import Git.Sha import Git.FilePath +import Git.Sha import Types.Key import Types.Remote import Annex.Content import Annex.CatFile import Logs.Location +import Logs.Export import Messages.Progress import Utility.Tmp @@ -67,45 +69,46 @@ exportKey sha = mk <$> catKey sha seek :: ExportOptions -> CommandSeek seek o = do r <- getParsed (exportRemote o) - let oldtreeish = emptyTree -- XXX temporary + new <- fromMaybe (error "unknown tree") <$> + inRepo (Git.Ref.sha (exportTreeish o)) + old <- getExport (uuid r) - -- First, diff the old and new trees and update all changed - -- files in the export. - (diff, cleanup) <- inRepo $ - Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o) - seekActions $ pure $ map (startDiff r) diff - void $ liftIO cleanup + when (length old > 1) $ + warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.." + + -- First, diff the old and new trees and delete all changed + -- files in the export. Every file that remains in the export will + -- have the content from the new treeish. + -- + -- (Also, when there was an export conflict, this resolves it.) + forM_ old $ \oldtreesha -> do + (diff, cleanup) <- inRepo $ + Git.DiffTree.diffTreeRecursive oldtreesha new + seekActions $ pure $ map (startUnexport r) diff + void $ liftIO cleanup - -- In case a previous export was incomplete, make a pass - -- over the whole tree and export anything that is not - -- yet exported. - (l, cleanup') <- inRepo $ Git.LsTree.lsTree (exportTreeish o) - seekActions $ pure $ map (start r) l + -- Waiting until now to record the export guarantees that, + -- if this export is interrupted, there are no files left over + -- from a previous export, that are not part of this export. + recordExport (uuid r) $ ExportChange + { oldTreeish = old + , newTreeish = new + } + + -- Export everything that is not yet exported. + (l, cleanup') <- inRepo $ Git.LsTree.lsTree new + seekActions $ pure $ map (startExport r) l void $ liftIO cleanup' -startDiff :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart -startDiff r diff - | Git.DiffTree.dstsha diff == nullSha = do - showStart "unexport" f - oldk <- exportKey (Git.DiffTree.srcsha diff) - next $ performUnexport r oldk loc - | otherwise = do - showStart "export" f - k <- exportKey (Git.DiffTree.dstsha diff) - next $ performExport r k (Git.DiffTree.dstsha diff) loc - where - loc = ExportLocation $ toInternalGitPath $ - getTopFilePath $ Git.DiffTree.file diff - f = getTopFilePath $ Git.DiffTree.file diff - -start :: Remote -> Git.LsTree.TreeItem -> CommandStart -start r ti = do +startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart +startExport r ti = do ek <- exportKey (Git.LsTree.sha ti) - stopUnless (elem (uuid r) <$> loggedLocations (asKey ek)) $ + stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do + showStart "export" f next $ performExport r ek (Git.LsTree.sha ti) loc where - loc = ExportLocation $ toInternalGitPath $ - getTopFilePath $ Git.LsTree.file ti + loc = ExportLocation $ toInternalGitPath f + f = getTopFilePath $ Git.LsTree.file ti performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform performExport r ek contentsha loc = case storeExport r of @@ -137,6 +140,17 @@ cleanupExport r ek = do logChange (asKey ek) (uuid r) InfoPresent return True +startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart +startUnexport r diff + | Git.DiffTree.srcsha diff /= nullSha = do + showStart "unexport" f + oldk <- exportKey (Git.DiffTree.srcsha diff) + next $ performUnexport r oldk loc + | otherwise = stop + where + loc = ExportLocation $ toInternalGitPath f + f = getTopFilePath $ Git.DiffTree.file diff + performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform performUnexport r ek loc = case removeExport r of Nothing -> error "remote does not support removing exported files" diff --git a/Logs.hs b/Logs.hs index 716520af44..7b6c7dd20d 100644 --- a/Logs.hs +++ b/Logs.hs @@ -42,6 +42,7 @@ topLevelUUIDBasedLogs = , activityLog , differenceLog , multicastLog + , exportLog ] {- All the ways to get a key from a presence log file -} @@ -97,6 +98,9 @@ differenceLog = "difference.log" multicastLog :: FilePath multicastLog = "multicast.log" +exportLog :: FilePath +exportLog = "export.log" + {- The pathname of the location log file for a given key. -} locationLogFile :: GitConfig -> Key -> String locationLogFile config key = branchHashDir config key keyFile key ++ ".log" diff --git a/Logs/Export.hs b/Logs/Export.hs new file mode 100644 index 0000000000..a0019a06c7 --- /dev/null +++ b/Logs/Export.hs @@ -0,0 +1,67 @@ +{- git-annex export log + - + - Copyright 2017 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.Export where + +import qualified Data.Map as M + +import Annex.Common +import qualified Annex.Branch +import qualified Git +import Logs +import Logs.UUIDBased +import Annex.UUID + +-- | Get the treeish that was exported to a special remote. +-- +-- If the list contains multiple items, there was an export conflict, +-- and different trees were exported to the same special remote. +getExport :: UUID -> Annex [Git.Ref] +getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap + . parseLogNew parseExportLog + <$> Annex.Branch.get exportLog + where + get (ExportLog t u) + | u == remoteuuid = Just t + | otherwise = Nothing + +data ExportChange = ExportChange + { oldTreeish :: [Git.Ref] + , newTreeish :: Git.Ref + } + +-- | Record a change in what's exported to a special remote. +-- +-- Any entries in the log for the oldTreeish will be updated to the +-- newTreeish. This way, when multiple repositories are exporting to +-- the same special remote, there's no conflict as long as they move +-- forward in lock-step. +recordExport :: UUID -> ExportChange -> Annex () +recordExport remoteuuid ec = do + c <- liftIO currentVectorClock + u <- getUUID + let val = ExportLog (newTreeish ec) remoteuuid + Annex.Branch.change exportLog $ + showLogNew formatExportLog + . changeLog c u val + . M.mapWithKey (updateothers c u) + . parseLogNew parseExportLog + where + updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid')) + | u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le + | otherwise = LogEntry c (ExportLog (newTreeish ec) theiru) + +data ExportLog = ExportLog Git.Ref UUID + +formatExportLog :: ExportLog -> String +formatExportLog (ExportLog treeish remoteuuid) = + Git.fromRef treeish ++ " " ++ fromUUID remoteuuid + +parseExportLog :: String -> Maybe ExportLog +parseExportLog s = case words s of + (t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u) + _ -> Nothing diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 7d39b10681..4b24ce443a 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -185,8 +185,12 @@ content expression. Tracks what trees have been exported to special remotes by [[git-annex-export]](1). -Each line starts with a timestamp, then the uuid of the special remote, -followed by the sha1 of the tree that was exported to that special remote. +Each line starts with a timestamp, then the uuid of the repository +that exported to the special remote, followed by the sha1 of the tree +that was exported, and then by the uuid of the special remote. For example: + + 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 + 1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55 (The exported tree is also grafted into the git-annex branch, at `export.tree`, to prevent git from garbage collecting it. However, the head diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 354dc84e7b..914febe34c 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -17,14 +17,10 @@ there need to be a new interface in supported remotes? Work is in progress. Todo list: -* Remember the previously exported tree (in git-annex branch, see design) - and use to make next export more efficient. * Only export to remotes that were initialized to support it. * Prevent using export remotes for key/value storage. -* When exporting, update location tracking to allow getting from exports, * Use retrieveExport when getting from export remotes. * Efficient handling of renames. -* Detect export conflicts (see design) * Support export to aditional special remotes (S3 etc) * Support export to external special remotes. * If the same content is present in two different files, export @@ -36,4 +32,4 @@ Work is in progress. Todo list: And, once one of the files is uploaded, the location log will say the content is present, so the pass over the tree won't try to - upload the other file. + upload the other file. (See design for a fix for this.) diff --git a/git-annex.cabal b/git-annex.cabal index 16b6bda27f..a7d0628576 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -849,6 +849,7 @@ Executable git-annex Logs.Config Logs.Difference Logs.Difference.Pure + Logs.Export Logs.FsckResults Logs.Group Logs.Line From 5483ea90eca33f61c799fb6a3c2675657caa9c75 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 31 Aug 2017 18:06:49 -0400 Subject: [PATCH 11/31] graft exported tree into git-annex branch So it will be available later and elsewhere, even after GC. I first though to use git update-index to do this, but feeding it a line with a tree object seems to always cause it to generate a git subtree merge. So, fell back to using the Git.Tree interface to maniupulate the trees, and not involving the git-annex branch index file at all. This commit was sponsored by Andreas Karlsson. --- Annex/Branch.hs | 1 + Command/Export.hs | 4 +++- Git/Tree.hs | 6 ++++- Logs/Export.hs | 24 +++++++++++++++++++ .../exporting_trees_to_special_remotes.mdwn | 15 ++---------- 5 files changed, 35 insertions(+), 15 deletions(-) diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 5482dc44ba..5214df627d 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -21,6 +21,7 @@ module Annex.Branch ( maybeChange, commit, forceCommit, + getBranch, files, withIndex, performTransitions, diff --git a/Command/Export.hs b/Command/Export.hs index 1310244aca..0df13e489e 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -70,7 +70,9 @@ seek :: ExportOptions -> CommandSeek seek o = do r <- getParsed (exportRemote o) new <- fromMaybe (error "unknown tree") <$> - inRepo (Git.Ref.sha (exportTreeish o)) + -- Dereference the tree pointed to by the branch, commit, + -- or tag. + inRepo (Git.Ref.tree (exportTreeish o)) old <- getExport (uuid r) when (length old > 1) $ diff --git a/Git/Tree.hs b/Git/Tree.hs index 3e6b85a1d4..9e9b17af2b 100644 --- a/Git/Tree.hs +++ b/Git/Tree.hs @@ -14,6 +14,7 @@ module Git.Tree ( recordTree, TreeItem(..), adjustTree, + treeMode, ) where import Common @@ -94,12 +95,15 @@ mkTree (MkTreeHandle cp) l = CoProcess.query cp send receive send h = do forM_ l $ \i -> hPutStr h $ case i of TreeBlob f fm s -> mkTreeOutput fm BlobObject s f - RecordedSubTree f s _ -> mkTreeOutput 0o040000 TreeObject s f + RecordedSubTree f s _ -> mkTreeOutput treeMode TreeObject s f NewSubTree _ _ -> error "recordSubTree internal error; unexpected NewSubTree" TreeCommit f fm s -> mkTreeOutput fm CommitObject s f hPutStr h "\NUL" -- signal end of tree to --batch receive h = getSha "mktree" (hGetLine h) +treeMode :: FileMode +treeMode = 0o040000 + mkTreeOutput :: FileMode -> ObjectType -> Sha -> TopFilePath -> String mkTreeOutput fm ot s f = concat [ showOct fm "" diff --git a/Logs/Export.hs b/Logs/Export.hs index a0019a06c7..1fd1460fcd 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -12,6 +12,9 @@ import qualified Data.Map as M import Annex.Common import qualified Annex.Branch import qualified Git +import qualified Git.Branch +import Git.Tree +import Git.FilePath import Logs import Logs.UUIDBased import Annex.UUID @@ -40,6 +43,9 @@ data ExportChange = ExportChange -- newTreeish. This way, when multiple repositories are exporting to -- the same special remote, there's no conflict as long as they move -- forward in lock-step. +-- +-- Also, the newTreeish is grafted into the git-annex branch. This is done +-- to ensure that it's available later. recordExport :: UUID -> ExportChange -> Annex () recordExport remoteuuid ec = do c <- liftIO currentVectorClock @@ -50,6 +56,7 @@ recordExport remoteuuid ec = do . changeLog c u val . M.mapWithKey (updateothers c u) . parseLogNew parseExportLog + graftTreeish (newTreeish ec) where updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid')) | u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le @@ -65,3 +72,20 @@ parseExportLog :: String -> Maybe ExportLog parseExportLog s = case words s of (t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u) _ -> Nothing + +-- To prevent git-annex branch merge conflicts, the treeish is +-- first grafted in and then removed in a subsequent commit. +graftTreeish :: Git.Ref -> Annex () +graftTreeish treeish = do + branchref <- Annex.Branch.getBranch + Tree t <- inRepo $ getTree branchref + t' <- inRepo $ recordTree $ Tree $ + RecordedSubTree (asTopFilePath graftpoint) treeish [] : t + commit <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit + "export tree" [branchref] t' + origtree <- inRepo $ recordTree (Tree t) + commit' <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit + "export tree cleanup" [commit] origtree + inRepo $ Git.Branch.update' Annex.Branch.fullname commit' + where + graftpoint = "export.tree" diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index 835a866404..7e5700fae0 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -133,12 +133,6 @@ key/value stores. The content of a file can change, and if multiple repositories can export a special remote, they can be out of sync about what files are exported to it. -To avoid such problems, when updating an exported file on a special remote, -the key could be recorded there too. But, this would have to be done -atomically, and checked atomically when downloading the file. Special -remotes lack atomicity guarantees for file storage, let alone for file -retrieval. - Possible solution: Make exporttree=true cause the special remote to be untrusted, and rely on annex.verify to catch cases where the content of a file on a special remote has changed. This would work well enough @@ -205,13 +199,8 @@ In this case, git-annex knows both exported trees. Have the user provide a tree that resolves the conflict as they desire (it could be the same as one of the exported trees, or some merge of them or an entirely new tree). The UI to do this can just be another `git annex export $tree --to remote`. -To resolve, diff each exported tree in turn against the resolving tree. If a -file differs, re-export that file. In some cases this will do unncessary -re-uploads, but it's reasonably efficient. - -The documentation should suggest strongly only exporting to a given special -remote from a single repository, or having some other rule that avoids -export conflicts. +To resolve, diff each exported tree in turn against the resolving tree +and delete all files that differ. ## when to update export.log for efficient resuming of exports From a4328b49d2d0e07815db4d92b1176fe4bed16cf5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Sep 2017 13:02:07 -0400 Subject: [PATCH 12/31] refactor ExportActions This will allow disabling exports for remotes that are not configured to allow them. Also, exportSupported will be useful for the external special remote to probe. This commit was supported by the NSF-funded DataLad project --- Command/Export.hs | 61 ++++++------ Remote/BitTorrent.hs | 7 +- Remote/Bup.hs | 7 +- Remote/Ddar.hs | 7 +- Remote/Directory.hs | 14 ++- Remote/External.hs | 7 +- Remote/GCrypt.hs | 7 +- Remote/Git.hs | 7 +- Remote/Glacier.hs | 7 +- Remote/Helper/Export.hs | 21 +++++ Remote/Hook.hs | 7 +- Remote/P2P.hs | 7 +- Remote/Rsync.hs | 7 +- Remote/S3.hs | 7 +- Remote/Tahoe.hs | 7 +- Remote/Web.hs | 7 +- Remote/WebDAV.hs | 7 +- Types/Remote.hs | 93 ++++++++++--------- .../exporting_trees_to_special_remotes.mdwn | 4 + git-annex.cabal | 1 + 20 files changed, 143 insertions(+), 149 deletions(-) create mode 100644 Remote/Helper/Export.hs diff --git a/Command/Export.hs b/Command/Export.hs index 0df13e489e..03d549cbf8 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -69,6 +69,9 @@ exportKey sha = mk <$> catKey sha seek :: ExportOptions -> CommandSeek seek o = do r <- getParsed (exportRemote o) + unlessM (exportSupported (exportActions r)) $ + error "That remote does not support exports." + new <- fromMaybe (error "unknown tree") <$> -- Dereference the tree pointed to by the branch, commit, -- or tag. @@ -113,29 +116,28 @@ startExport r ti = do f = getTopFilePath $ Git.LsTree.file ti performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform -performExport r ek contentsha loc = case storeExport r of - Nothing -> error "remote does not support exporting files" - Just storer -> do - sent <- case ek of - AnnexKey k -> ifM (inAnnex k) - ( metered Nothing k $ \m -> do - let rollback = void $ performUnexport r ek loc - sendAnnex k rollback - (\f -> storer f k loc m) - , do - showNote "not available" - return False - ) - -- Sending a non-annexed file. - GitKey sha1k -> metered Nothing sha1k $ \m -> - withTmpFile "export" $ \tmp h -> do - b <- catObject contentsha - liftIO $ L.hPut h b - liftIO $ hClose h - storer tmp sha1k loc m - if sent - then next $ cleanupExport r ek - else stop +performExport r ek contentsha loc = do + let storer = storeExport $ exportActions r + sent <- case ek of + AnnexKey k -> ifM (inAnnex k) + ( metered Nothing k $ \m -> do + let rollback = void $ performUnexport r ek loc + sendAnnex k rollback + (\f -> storer f k loc m) + , do + showNote "not available" + return False + ) + -- Sending a non-annexed file. + GitKey sha1k -> metered Nothing sha1k $ \m -> + withTmpFile "export" $ \tmp h -> do + b <- catObject contentsha + liftIO $ L.hPut h b + liftIO $ hClose h + storer tmp sha1k loc m + if sent + then next $ cleanupExport r ek + else stop cleanupExport :: Remote -> ExportKey -> CommandCleanup cleanupExport r ek = do @@ -154,13 +156,12 @@ startUnexport r diff f = getTopFilePath $ Git.DiffTree.file diff performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform -performUnexport r ek loc = case removeExport r of - Nothing -> error "remote does not support removing exported files" - Just remover -> do - ok <- remover (asKey ek) loc - if ok - then next $ cleanupUnexport r ek - else stop +performUnexport r ek loc = do + let remover = removeExport $ exportActions r + ok <- remover (asKey ek) loc + if ok + then next $ cleanupUnexport r ek + else stop cleanupUnexport :: Remote -> ExportKey -> CommandCleanup cleanupUnexport r ek = do diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 887a0898e6..9a1be1c0ea 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -26,6 +26,7 @@ import Backend.URL import Annex.Perms import Annex.UUID import qualified Annex.Url as Url +import Remote.Helper.Export import Network.URI @@ -61,11 +62,7 @@ gen r _ c gc = , lockContent = Nothing , checkPresent = checkKey , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Bup.hs b/Remote/Bup.hs index aad8e6bbad..6ff2aa885a 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -25,6 +25,7 @@ import Config.Cost import qualified Remote.Helper.Ssh as Ssh import Remote.Helper.Special import Remote.Helper.Messages +import Remote.Helper.Export import Utility.Hash import Utility.UserInfo import Annex.UUID @@ -61,11 +62,7 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = bupLocal buprepo - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 1da3ff4121..c5d02a4e6a 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -19,6 +19,7 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special +import Remote.Helper.Export import Annex.Ssh import Annex.UUID import Utility.SshHost @@ -60,11 +61,7 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = ddarLocal ddarrepo - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Directory.hs b/Remote/Directory.hs index f5d7f7e49c..e2e517b842 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -25,6 +25,7 @@ import Config.Cost import Config import Utility.FileMode import Remote.Helper.Special +import Remote.Helper.Export import qualified Remote.Directory.LegacyChunked as Legacy import Annex.Content import Annex.UUID @@ -59,11 +60,14 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = True - , storeExport = Just $ storeExportDirectory dir - , retrieveExport = Just $ retrieveExportDirectory dir - , removeExport = Just $ removeExportDirectory dir - , checkPresentExport = Just $ checkPresentExportDirectory dir - , renameExport = Just $ renameExportDirectory dir + , exportActions = ExportActions + { exportSupported = return True + , storeExport = storeExportDirectory dir + , retrieveExport = retrieveExportDirectory dir + , removeExport = removeExportDirectory dir + , checkPresentExport = checkPresentExportDirectory dir + , renameExport = renameExportDirectory dir + } , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/External.hs b/Remote/External.hs index dd62c1539e..fca60a995f 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -18,6 +18,7 @@ import Config import Git.Config (isTrue, boolConfig) import Git.Env import Remote.Helper.Special +import Remote.Helper.Export import Remote.Helper.ReadOnly import Remote.Helper.Messages import Utility.Metered @@ -85,11 +86,7 @@ gen r u c gc , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = towhereis , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 95b7ae2875..dd681a75c7 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -38,6 +38,7 @@ import Remote.Helper.Git import Remote.Helper.Encryptable import Remote.Helper.Special import Remote.Helper.Messages +import Remote.Helper.Export import qualified Remote.Helper.Ssh as Ssh import Utility.Metered import Annex.UUID @@ -114,11 +115,7 @@ gen' r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = repoCheap r - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Git.hs b/Remote/Git.hs index 020cd1c619..129d5e1716 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -50,6 +50,7 @@ import Utility.Batch import Utility.SimpleProtocol import Remote.Helper.Git import Remote.Helper.Messages +import Remote.Helper.Export import qualified Remote.Helper.Ssh as Ssh import qualified Remote.GCrypt import qualified Remote.P2P @@ -157,11 +158,7 @@ gen r u c gc , lockContent = Just (lockKey new) , checkPresent = inAnnex new , checkPresentCheap = repoCheap r - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = if Git.repoIsUrl r then Nothing diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index be65cecb76..b21167aaf3 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -18,6 +18,7 @@ import Config import Config.Cost import Remote.Helper.Special import Remote.Helper.Messages +import Remote.Helper.Export import qualified Remote.Helper.AWS as AWS import Creds import Utility.Metered @@ -57,11 +58,7 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Helper/Export.hs b/Remote/Helper/Export.hs new file mode 100644 index 0000000000..d623818e73 --- /dev/null +++ b/Remote/Helper/Export.hs @@ -0,0 +1,21 @@ +{- exports to remotes + - + - Copyright 2017 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.Export where + +import Annex.Common +import Types.Remote + +exportUnsupported :: ExportActions Annex +exportUnsupported = ExportActions + { exportSupported = return False + , storeExport = \_ _ _ _ -> return False + , retrieveExport = \_ _ _ _ -> return (False, UnVerified) + , removeExport = \_ _ -> return False + , checkPresentExport = \_ _ -> return False + , renameExport = \_ _ _ -> return False + } diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 2a98742426..5be4339e33 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -16,6 +16,7 @@ import Config.Cost import Annex.UUID import Remote.Helper.Special import Remote.Helper.Messages +import Remote.Helper.Export import Utility.Env import Messages.Progress @@ -51,11 +52,7 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/P2P.hs b/Remote/P2P.hs index d77ac89d8f..f51b73b33e 100644 --- a/Remote/P2P.hs +++ b/Remote/P2P.hs @@ -24,6 +24,7 @@ import Annex.UUID import Config import Config.Cost import Remote.Helper.Git +import Remote.Helper.Export import Messages.Progress import Utility.Metered import Utility.AuthToken @@ -57,11 +58,7 @@ chainGen addr r u c gc = do , lockContent = Just (lock u addr connpool) , checkPresent = checkpresent u addr connpool , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index d40d23baef..33485c78b8 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -28,6 +28,7 @@ import Annex.UUID import Annex.Ssh import Remote.Helper.Special import Remote.Helper.Messages +import Remote.Helper.Export import Remote.Rsync.RsyncUrl import Crypto import Utility.Rsync @@ -73,11 +74,7 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/S3.hs b/Remote/S3.hs index ce67765955..341d14b4e7 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -39,6 +39,7 @@ import Config.Cost import Remote.Helper.Special import Remote.Helper.Http import Remote.Helper.Messages +import Remote.Helper.Export import qualified Remote.Helper.AWS as AWS import Creds import Annex.UUID @@ -84,11 +85,7 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Just (getWebUrls info) , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index cf65634b04..b197edca2a 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -34,6 +34,7 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special +import Remote.Helper.Export import Annex.UUID import Annex.Content import Logs.RemoteState @@ -75,11 +76,7 @@ gen r u c gc = do , lockContent = Nothing , checkPresent = checkKey u hdl , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Just (getWhereisKey u) , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/Web.hs b/Remote/Web.hs index 4d55389ec5..45e8d1c229 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -10,6 +10,7 @@ module Remote.Web (remote, getWebUrls) where import Annex.Common import Types.Remote import Remote.Helper.Messages +import Remote.Helper.Export import qualified Git import qualified Git.Construct import Annex.Content @@ -50,11 +51,7 @@ gen r _ c gc = , lockContent = Nothing , checkPresent = checkKey , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 9230a027d3..4c9552a6f9 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -28,6 +28,7 @@ import Config.Cost import Remote.Helper.Special import Remote.Helper.Messages import Remote.Helper.Http +import Remote.Helper.Export import qualified Remote.Helper.Chunked.Legacy as Legacy import Creds import Utility.Metered @@ -68,11 +69,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost , lockContent = Nothing , checkPresent = checkPresentDummy , checkPresentCheap = False - , storeExport = Nothing - , retrieveExport = Nothing - , removeExport = Nothing - , checkPresentExport = Nothing - , renameExport = Nothing + , exportActions = exportUnsupported , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing diff --git a/Types/Remote.hs b/Types/Remote.hs index 6e78bf2386..169701eccb 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -19,6 +19,7 @@ module Types.Remote , Verification(..) , unVerified , ExportLocation(..) + , ExportActions(..) ) where @@ -63,91 +64,75 @@ instance Eq (RemoteTypeA a) where x == y = typename x == typename y {- An individual remote. -} -data RemoteA a = Remote { +data RemoteA a = Remote -- each Remote has a unique uuid - uuid :: UUID, + { uuid :: UUID -- each Remote has a human visible name - name :: RemoteName, + , name :: RemoteName -- Remotes have a use cost; higher is more expensive - cost :: Cost, + , cost :: Cost -- Transfers a key's contents from disk to the remote. -- The key should not appear to be present on the remote until -- all of its contents have been transferred. - storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool, + , storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool -- Retrieves a key's contents to a file. -- (The MeterUpdate does not need to be used if it writes -- sequentially to the file.) - retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a (Bool, Verification), + , retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a (Bool, Verification) -- Retrieves a key's contents to a tmp file, if it can be done cheaply. -- It's ok to create a symlink or hardlink. - retrieveKeyFileCheap :: Key -> AssociatedFile -> FilePath -> a Bool, + , retrieveKeyFileCheap :: Key -> AssociatedFile -> FilePath -> a Bool -- Removes a key's contents (succeeds if the contents are not present) - removeKey :: Key -> a Bool, + , removeKey :: Key -> a Bool -- Uses locking to prevent removal of a key's contents, -- thus producing a VerifiedCopy, which is passed to the callback. -- If unable to lock, does not run the callback, and throws an -- error. -- This is optional; remotes do not have to support locking. - lockContent :: forall r. Maybe (Key -> (VerifiedCopy -> a r) -> a r), + , lockContent :: forall r. Maybe (Key -> (VerifiedCopy -> a r) -> a r) -- Checks if a key is present in the remote. -- Throws an exception if the remote cannot be accessed. - checkPresent :: Key -> a Bool, + , checkPresent :: Key -> a Bool -- Some remotes can checkPresent without an expensive network -- operation. - checkPresentCheap :: Bool, - - -- Exports content to an ExportLocation. - -- The exported file should not appear to be present on the remote - -- until all of its contents have been transferred. - storeExport :: Maybe (FilePath -> Key -> ExportLocation -> MeterUpdate -> a Bool), - -- Retrieves exported content to a file. - -- (The MeterUpdate does not need to be used if it writes - -- sequentially to the file.) - retrieveExport :: Maybe (Key -> ExportLocation -> FilePath -> MeterUpdate -> a (Bool, Verification)), - -- Removes an exported file (succeeds if the contents are not present) - removeExport :: Maybe (Key -> ExportLocation -> a Bool), - -- Checks if anything is exported to the remote at the specified - -- ExportLocation. - -- Throws an exception if the remote cannot be accessed. - checkPresentExport :: Maybe (Key -> ExportLocation -> a Bool), - -- Renames an already exported file. - renameExport :: Maybe (Key -> ExportLocation -> ExportLocation -> a Bool), - + , checkPresentCheap :: Bool + -- Some remotes support exports of trees. + , exportActions :: ExportActions a -- Some remotes can provide additional details for whereis. - whereisKey :: Maybe (Key -> a [String]), + , whereisKey :: Maybe (Key -> a [String]) -- Some remotes can run a fsck operation on the remote, -- without transferring all the data to the local repo -- The parameters are passed to the fsck command on the remote. - remoteFsck :: Maybe ([CommandParam] -> a (IO Bool)), + , remoteFsck :: Maybe ([CommandParam] -> a (IO Bool)) -- Runs an action to repair the remote's git repository. - repairRepo :: Maybe (a Bool -> a (IO Bool)), + , repairRepo :: Maybe (a Bool -> a (IO Bool)) -- a Remote has a persistent configuration store - config :: RemoteConfig, + , config :: RemoteConfig -- git repo for the Remote - repo :: Git.Repo, + , repo :: Git.Repo -- a Remote's configuration from git - gitconfig :: RemoteGitConfig, + , gitconfig :: RemoteGitConfig -- a Remote can be assocated with a specific local filesystem path - localpath :: Maybe FilePath, + , localpath :: Maybe FilePath -- a Remote can be known to be readonly - readonly :: Bool, + , readonly :: Bool -- a Remote can be globally available. (Ie, "in the cloud".) - availability :: Availability, + , availability :: Availability -- the type of the remote - remotetype :: RemoteTypeA a, + , remotetype :: RemoteTypeA a -- For testing, makes a version of this remote that is not -- available for use. All its actions should fail. - mkUnavailable :: a (Maybe (RemoteA a)), + , mkUnavailable :: a (Maybe (RemoteA a)) -- Information about the remote, for git annex info to display. - getInfo :: a [(String, String)], + , getInfo :: a [(String, String)] -- Some remotes can download from an url (or uri). - claimUrl :: Maybe (URLString -> a Bool), + , claimUrl :: Maybe (URLString -> a Bool) -- Checks that the url is accessible, and gets information about -- its contents, without downloading the full content. -- Throws an exception if the url is inaccessible. - checkUrl :: Maybe (URLString -> a UrlContents) -} + , checkUrl :: Maybe (URLString -> a UrlContents) + } instance Show (RemoteA a) where show remote = "Remote { name =\"" ++ name remote ++ "\" }" @@ -175,3 +160,23 @@ unVerified a = do -- The FilePath will be relative, and may contain unix-style path -- separators. newtype ExportLocation = ExportLocation FilePath + +data ExportActions a = ExportActions + { exportSupported :: a Bool + -- Exports content to an ExportLocation. + -- The exported file should not appear to be present on the remote + -- until all of its contents have been transferred. + , storeExport :: FilePath -> Key -> ExportLocation -> MeterUpdate -> a Bool + -- Retrieves exported content to a file. + -- (The MeterUpdate does not need to be used if it writes + -- sequentially to the file.) + , retrieveExport :: Key -> ExportLocation -> FilePath -> MeterUpdate -> a (Bool, Verification) + -- Removes an exported file (succeeds if the contents are not present) + , removeExport :: Key -> ExportLocation -> a Bool + -- Checks if anything is exported to the remote at the specified + -- ExportLocation. + -- Throws an exception if the remote cannot be accessed. + , checkPresentExport :: Key -> ExportLocation -> a Bool + -- Renames an already exported file. + , renameExport :: Key -> ExportLocation -> ExportLocation -> a Bool + } diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index 7e5700fae0..52a68c6b42 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -83,6 +83,10 @@ the [[external_special_remote_protocol]]. Here's the changes to the latter: +* `EXPORTSUPPORTED` + Used to check if a special remote supports exports. The remote + responds with either `EXPORTSUPPORTED-SUCCESS` or + `EXPORTSUPPORTED-FAILURE` * `EXPORT Name` Comes immediately before each of the following requests, specifying the name of the exported file. It will be in the form diff --git a/git-annex.cabal b/git-annex.cabal index a7d0628576..178531de04 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -902,6 +902,7 @@ Executable git-annex Remote.Helper.Chunked Remote.Helper.Chunked.Legacy Remote.Helper.Encryptable + Remote.Helper.Export Remote.Helper.Git Remote.Helper.Hooks Remote.Helper.Http From 28e2cad84989f96699f15ccbc9a48f8a27cccb32 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Sep 2017 12:40:33 -0400 Subject: [PATCH 13/31] implement exporttree=yes configuration * Only export to remotes that were initialized to support it. * Prevent storing key/value on export remotes. * Prevent enabling exporttree=yes and encryption in the same remote. SetupStage Enable was changed to take the old RemoteConfig. This allowed only setting exporttree when initially setting up a remote, and not configuring it later after stuff might already be stored in the remote. Went with =yes rather than =true for consistency with other parts of git-annex. Changed docs accordingly. This commit was supported by the NSF-funded DataLad project. --- Annex/SpecialRemote.hs | 2 +- Assistant/MakeRemote.hs | 4 +-- Command/EnableRemote.hs | 8 ++--- Remote/Directory.hs | 4 +-- Remote/Git.hs | 4 +-- Remote/Glacier.hs | 5 ++-- Remote/Helper/Encryptable.hs | 12 +++++++- Remote/Helper/Export.hs | 30 +++++++++++++++++++ Remote/S3.hs | 5 ++-- Types/Remote.hs | 3 +- .../exporting_trees_to_special_remotes.mdwn | 10 +++---- doc/git-annex-export.mdwn | 2 +- doc/special_remotes/directory.mdwn | 2 +- doc/todo/export.mdwn | 7 ++--- 14 files changed, 69 insertions(+), 29 deletions(-) diff --git a/Annex/SpecialRemote.hs b/Annex/SpecialRemote.hs index f53a2ca638..c215208db2 100644 --- a/Annex/SpecialRemote.hs +++ b/Annex/SpecialRemote.hs @@ -81,7 +81,7 @@ autoEnable = do (Just name, Right t) -> whenM (canenable u) $ do showSideAction $ "Auto enabling special remote " ++ name dummycfg <- liftIO dummyRemoteGitConfig - res <- tryNonAsync $ setup t Enable (Just u) Nothing c dummycfg + res <- tryNonAsync $ setup t (Enable c) (Just u) Nothing c dummycfg case res of Left e -> warning (show e) Right _ -> return () diff --git a/Assistant/MakeRemote.hs b/Assistant/MakeRemote.hs index 57abb86fd0..b98e7f0237 100644 --- a/Assistant/MakeRemote.hs +++ b/Assistant/MakeRemote.hs @@ -52,7 +52,7 @@ makeRsyncRemote name location = makeRemote name location $ const $ void $ go Nothing = setupSpecialRemote name Rsync.remote config Nothing (Nothing, R.Init, Annex.SpecialRemote.newConfig name) go (Just (u, c)) = setupSpecialRemote name Rsync.remote config Nothing - (Just u, R.Enable, c) + (Just u, R.Enable c, c) config = M.fromList [ ("encryption", "shared") , ("rsyncurl", location) @@ -91,7 +91,7 @@ enableSpecialRemote name remotetype mcreds config = do r <- Annex.SpecialRemote.findExisting name case r of Nothing -> error $ "Cannot find a special remote named " ++ name - Just (u, c) -> setupSpecialRemote' False name remotetype config mcreds (Just u, R.Enable, c) + Just (u, c) -> setupSpecialRemote' False name remotetype config mcreds (Just u, R.Enable c, c) setupSpecialRemote :: RemoteName -> RemoteType -> R.RemoteConfig -> Maybe CredPair -> (Maybe UUID, R.SetupStage, R.RemoteConfig) -> Annex RemoteName setupSpecialRemote = setupSpecialRemote' True diff --git a/Command/EnableRemote.hs b/Command/EnableRemote.hs index a2a26009ee..fd830375a4 100644 --- a/Command/EnableRemote.hs +++ b/Command/EnableRemote.hs @@ -81,11 +81,11 @@ startSpecialRemote name config (Just (u, c)) = do gc <- maybe (liftIO dummyRemoteGitConfig) (return . Remote.gitconfig) =<< Remote.byUUID u - next $ performSpecialRemote t u fullconfig gc + next $ performSpecialRemote t u c fullconfig gc -performSpecialRemote :: RemoteType -> UUID -> R.RemoteConfig -> RemoteGitConfig -> CommandPerform -performSpecialRemote t u c gc = do - (c', u') <- R.setup t R.Enable (Just u) Nothing c gc +performSpecialRemote :: RemoteType -> UUID -> R.RemoteConfig -> R.RemoteConfig -> RemoteGitConfig -> CommandPerform +performSpecialRemote t u oldc c gc = do + (c', u') <- R.setup t (R.Enable oldc) (Just u) Nothing c gc next $ cleanupSpecialRemote u' c' cleanupSpecialRemote :: UUID -> R.RemoteConfig -> CommandCleanup diff --git a/Remote/Directory.hs b/Remote/Directory.hs index e2e517b842..6adf6477aa 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -37,14 +37,14 @@ remote = RemoteType { typename = "directory", enumerate = const (findSpecialRemotes "directory"), generate = gen, - setup = directorySetup + setup = exportableRemoteSetup directorySetup } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do cst <- remoteCost gc cheapRemoteCost let chunkconfig = getChunkConfig c - return $ Just $ specialRemote c + return $ Just $ exportableRemote $ specialRemote c (prepareStore dir chunkconfig) (retrieve dir chunkconfig) (simplyPrepare $ remove dir) diff --git a/Remote/Git.hs b/Remote/Git.hs index 129d5e1716..64fb51af83 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -111,7 +111,7 @@ gitSetup Init mu _ c _ = do if isNothing mu || mu == Just u then return (c, u) else error "git remote did not have specified uuid" -gitSetup Enable (Just u) _ c _ = do +gitSetup (Enable _) (Just u) _ c _ = do inRepo $ Git.Command.run [ Param "remote" , Param "add" @@ -119,7 +119,7 @@ gitSetup Enable (Just u) _ c _ = do , Param $ fromMaybe (giveup "no location") (M.lookup "location" c) ] return (c, u) -gitSetup Enable Nothing _ _ _ = error "unable to enable git remote with no specified uuid" +gitSetup (Enable _) Nothing _ _ _ = error "unable to enable git remote with no specified uuid" {- It's assumed to be cheap to read the config of non-URL remotes, so this is - done each time git-annex is run in a way that uses remotes. diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index b21167aaf3..67e1b8b2e0 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -89,8 +89,9 @@ glacierSetup' ss u mcreds c gc = do (c', encsetup) <- encryptionSetup c gc c'' <- setRemoteCredPair encsetup c' gc (AWS.creds u) mcreds let fullconfig = c'' `M.union` defaults - when (ss == Init) $ - genVault fullconfig gc u + case ss of + Init -> genVault fullconfig gc u + _ -> return () gitConfigSpecialRemote u fullconfig "glacier" "true" return (fullconfig, u) where diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 1fe6d75be5..97e55a4158 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -15,6 +15,7 @@ module Remote.Helper.Encryptable ( embedCreds, cipherKey, extractCipher, + isEncrypted, describeEncryption, ) where @@ -57,7 +58,7 @@ encryptionSetup c gc = do encryption = M.lookup "encryption" c -- Generate a new cipher, depending on the chosen encryption scheme genCipher cmd = case encryption of - _ | M.member "cipher" c || M.member "cipherkeys" c || M.member "pubkeys" c -> cannotchange + _ | hasEncryptionConfig c -> cannotchange Just "none" -> return (c, NoEncryption) Just "shared" -> encsetup $ genSharedCipher cmd -- hybrid encryption is the default when a keyid is @@ -167,6 +168,15 @@ extractCipher c = case (M.lookup "cipher" c, where readkeys = KeyIds . splitc ',' +isEncrypted :: RemoteConfig -> Bool +isEncrypted c = case M.lookup "encryption" c of + Just "none" -> False + Just _ -> True + Nothing -> hasEncryptionConfig c + +hasEncryptionConfig :: RemoteConfig -> Bool +hasEncryptionConfig c = M.member "cipher" c || M.member "cipherkeys" c || M.member "pubkeys" c + describeEncryption :: RemoteConfig -> String describeEncryption c = case extractCipher c of Nothing -> "none" diff --git a/Remote/Helper/Export.hs b/Remote/Helper/Export.hs index d623818e73..9bbbb1f59c 100644 --- a/Remote/Helper/Export.hs +++ b/Remote/Helper/Export.hs @@ -9,7 +9,12 @@ module Remote.Helper.Export where import Annex.Common import Types.Remote +import Types.Creds +import Remote.Helper.Encryptable (isEncrypted) +import qualified Data.Map as M + +-- | Use for remotes that do not support exports. exportUnsupported :: ExportActions Annex exportUnsupported = ExportActions { exportSupported = return False @@ -19,3 +24,28 @@ exportUnsupported = ExportActions , checkPresentExport = \_ _ -> return False , renameExport = \_ _ _ -> return False } + +-- | A remote that supports exports when configured with exporttree=yes, +-- and otherwise does not. +exportableRemote :: Remote -> Remote +exportableRemote r = case M.lookup "exporttree" (config r) of + Just "yes" -> r + { storeKey = \_ _ _ -> do + warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it" + return False + } + _ -> r + { exportActions = exportUnsupported } + +exportableRemoteSetup :: (SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)) -> SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) +exportableRemoteSetup setupaction st mu cp c gc = case st of + Init -> case M.lookup "exporttree" c of + Just "yes" | isEncrypted c -> + giveup "cannot enable both encryption and exporttree" + _ -> cont + Enable oldc + | M.lookup "exporttree" c /= M.lookup "exporttree" oldc -> + giveup "cannot change exporttree of existing special remote" + | otherwise -> cont + where + cont = setupaction st mu cp c gc diff --git a/Remote/S3.hs b/Remote/S3.hs index 341d14b4e7..ffa6a11bbd 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -129,8 +129,9 @@ s3Setup' ss u mcreds c gc (c', encsetup) <- encryptionSetup c gc c'' <- setRemoteCredPair encsetup c' gc (AWS.creds u) mcreds let fullconfig = c'' `M.union` defaults - when (ss == Init) $ - genBucket fullconfig gc u + case ss of + Init -> genBucket fullconfig gc u + _ -> return () use fullconfig archiveorg = do diff --git a/Types/Remote.hs b/Types/Remote.hs index 169701eccb..a0174ebee4 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -44,8 +44,7 @@ type RemoteConfigKey = String type RemoteConfig = M.Map RemoteConfigKey String -data SetupStage = Init | Enable - deriving (Eq) +data SetupStage = Init | Enable RemoteConfig {- There are different types of remotes. -} data RemoteTypeA a = RemoteType { diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index 52a68c6b42..118f12978a 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -15,13 +15,13 @@ when they want to export a tree. (It would also be possible to drop all content from an existing special remote and reuse it, but there does not seem much benefit in doing so.) -Add a new `initremote` configuration `exporttree=true`, that cannot be +Add a new `initremote` configuration `exporttree=yes`, that cannot be changed by `enableremote`: - git annex initremote myexport type=... exporttree=true + git annex initremote myexport type=... exporttree=yes -It does not make sense to encrypt an export, so exporttree=true requires -(and can even imply) encryption=false. +It does not make sense to encrypt an export, so exporttree=yes requires +encryption=none. Note that the particular tree to export is not specified yet. This is because the tree that is exported to a special remote may change. @@ -137,7 +137,7 @@ key/value stores. The content of a file can change, and if multiple repositories can export a special remote, they can be out of sync about what files are exported to it. -Possible solution: Make exporttree=true cause the special remote to +Possible solution: Make exporttree=yes cause the special remote to be untrusted, and rely on annex.verify to catch cases where the content of a file on a special remote has changed. This would work well enough except for when the WORM or URL backend is used. So, prevent the user diff --git a/doc/git-annex-export.mdwn b/doc/git-annex-export.mdwn index 96a09dc9b6..abe00f09b6 100644 --- a/doc/git-annex-export.mdwn +++ b/doc/git-annex-export.mdwn @@ -15,7 +15,7 @@ keys. That is great for data storage, but your filenames are obscured. Exporting replicates the tree to the special remote as-is. Mixing key/value and exports in the same remote would be a mess and so is -not allowed. So, you have to configure a remote with `exporttree=true` +not allowed. So, you have to configure a remote with `exporttree=yes` when initially setting it up with [[git-annex-initremote]](1). Repeated exports are done efficiently, by diffing the old and new tree, diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn index e3f7f1e45f..70610c66de 100644 --- a/doc/special_remotes/directory.mdwn +++ b/doc/special_remotes/directory.mdwn @@ -31,7 +31,7 @@ remote: Do not use for new remotes. It is not safe to change the chunksize setting of an existing remote. -* `exporttree` - Set to "true" to make this special remote usable +* `exporttree` - Set to "yes" to make this special remote usable by [[git-annex-export]]. It will not be usable as a general-purpose special remote. diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 914febe34c..828e1c55bd 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -17,12 +17,9 @@ there need to be a new interface in supported remotes? Work is in progress. Todo list: -* Only export to remotes that were initialized to support it. -* Prevent using export remotes for key/value storage. * Use retrieveExport when getting from export remotes. + (Needs a map from key to ExportLocation) * Efficient handling of renames. -* Support export to aditional special remotes (S3 etc) -* Support export to external special remotes. * If the same content is present in two different files, export location tracking can be messed up. @@ -33,3 +30,5 @@ Work is in progress. Todo list: And, once one of the files is uploaded, the location log will say the content is present, so the pass over the tree won't try to upload the other file. (See design for a fix for this.) +* Support export to aditional special remotes (S3 etc) +* Support export to external special remotes. From 7eb9889bfd33646f336605c78e956a93cb7e7242 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Sep 2017 13:52:22 -0400 Subject: [PATCH 14/31] track exported files in a sqlite database Went with a separate db per export remote, rather than a single export database. Mostly because there will probably not be a lot of separate export remotes, and it might be convenient to be able to delete a given remote's export database. This commit was supported by the NSF-funded DataLad project. --- Annex/Locations.hs | 10 ++++++ Command/Export.hs | 17 ++++++---- Database/Export.hs | 85 ++++++++++++++++++++++++++++++++++++++++++++++ git-annex.cabal | 1 + 4 files changed, 107 insertions(+), 6 deletions(-) create mode 100644 Database/Export.hs diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 47768b9c10..a5de2e4ff0 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -36,6 +36,7 @@ module Annex.Locations ( gitAnnexFsckDbDir, gitAnnexFsckDbLock, gitAnnexFsckResultsLog, + gitAnnexExportDbDir, gitAnnexScheduleState, gitAnnexTransferDir, gitAnnexCredsDir, @@ -290,6 +291,15 @@ gitAnnexFsckDbLock u r = gitAnnexFsckDir u r "fsck.lck" gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath gitAnnexFsckResultsLog u r = gitAnnexDir r "fsckresults" fromUUID u +{- .git/annex/export/uuid/ is used to store information about + - exports to special remotes. -} +gitAnnexExportDir :: UUID -> Git.Repo -> FilePath +gitAnnexExportDir u r = gitAnnexDir r "export" fromUUID u + +{- Directory containing database used to record export info. -} +gitAnnexExportDbDir :: UUID -> Git.Repo -> FilePath +gitAnnexExportDbDir u r = gitAnnexExportDir u r "db" + {- .git/annex/schedulestate is used to store information about when - scheduled jobs were last run. -} gitAnnexScheduleState :: Git.Repo -> FilePath diff --git a/Command/Export.hs b/Command/Export.hs index 03d549cbf8..1f293025bf 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -21,6 +21,7 @@ import Annex.Content import Annex.CatFile import Logs.Location import Logs.Export +import Database.Export import Messages.Progress import Utility.Tmp @@ -81,6 +82,8 @@ seek o = do when (length old > 1) $ warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.." + db <- openDb (uuid r) + -- First, diff the old and new trees and delete all changed -- files in the export. Every file that remains in the export will -- have the content from the new treeish. @@ -89,7 +92,7 @@ seek o = do forM_ old $ \oldtreesha -> do (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive oldtreesha new - seekActions $ pure $ map (startUnexport r) diff + seekActions $ pure $ map (startUnexport r db) diff void $ liftIO cleanup -- Waiting until now to record the export guarantees that, @@ -102,12 +105,13 @@ seek o = do -- Export everything that is not yet exported. (l, cleanup') <- inRepo $ Git.LsTree.lsTree new - seekActions $ pure $ map (startExport r) l + seekActions $ pure $ map (startExport r db) l void $ liftIO cleanup' -startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart -startExport r ti = do +startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart +startExport r db ti = do ek <- exportKey (Git.LsTree.sha ti) + liftIO $ addExportLocation db (asKey ek) loc stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do showStart "export" f next $ performExport r ek (Git.LsTree.sha ti) loc @@ -144,11 +148,12 @@ cleanupExport r ek = do logChange (asKey ek) (uuid r) InfoPresent return True -startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart -startUnexport r diff +startUnexport :: Remote -> ExportHandle -> Git.DiffTree.DiffTreeItem -> CommandStart +startUnexport r db diff | Git.DiffTree.srcsha diff /= nullSha = do showStart "unexport" f oldk <- exportKey (Git.DiffTree.srcsha diff) + liftIO $ removeExportLocation db (asKey oldk) loc next $ performUnexport r oldk loc | otherwise = stop where diff --git a/Database/Export.hs b/Database/Export.hs new file mode 100644 index 0000000000..bc79af29fe --- /dev/null +++ b/Database/Export.hs @@ -0,0 +1,85 @@ +{- Sqlite database used for exports to special remotes. + - + - Copyright 2017 Joey Hess + -: + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} +{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} +{-# LANGUAGE RankNTypes #-} + +module Database.Export ( + ExportHandle, + openDb, + closeDb, + addExportLocation, + removeExportLocation, + getExportLocation, + ExportedId, +) where + +import Database.Types +import qualified Database.Queue as H +import Database.Init +import Annex.Locations +import Annex.Common hiding (delete) +import Types.Remote (ExportLocation(..)) + +import Database.Persist.TH +import Database.Esqueleto hiding (Key) + +data ExportHandle = ExportHandle H.DbQueue + +share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase| +Exported + key IKey + file SFilePath + KeyFileIndex key file + UniqueKey key +|] + +{- Opens the database, creating it if it doesn't exist yet. -} +openDb :: UUID -> Annex ExportHandle +openDb u = do + dbdir <- fromRepo (gitAnnexExportDbDir u) + let db = dbdir "db" + unlessM (liftIO $ doesFileExist db) $ do + initDb db $ void $ + runMigrationSilent migrateExport + h <- liftIO $ H.openDbQueue db "exported" + return $ ExportHandle h + +closeDb :: ExportHandle -> Annex () +closeDb (ExportHandle h) = liftIO $ H.closeDbQueue h + +queueDb :: ExportHandle -> SqlPersistM () -> IO () +queueDb (ExportHandle h) = H.queueDb h checkcommit + where + -- commit queue after 1000 changes + checkcommit sz _lastcommittime + | sz > 1000 = return True + | otherwise = return False + +addExportLocation :: ExportHandle -> Key -> ExportLocation -> IO () +addExportLocation h k (ExportLocation f) = queueDb h $ + void $ insertUnique $ Exported (toIKey k) (toSFilePath f) + +removeExportLocation :: ExportHandle -> Key -> ExportLocation -> IO () +removeExportLocation h k (ExportLocation f) = queueDb h $ + delete $ from $ \r -> do + where_ (r ^. ExportedKey ==. val ik &&. r ^. ExportedFile ==. val ef) + where + ik = toIKey k + ef = toSFilePath f + +{- Doesn't know about recently queued changes. -} +getExportLocation :: ExportHandle -> Key -> IO [ExportLocation] +getExportLocation (ExportHandle h) k = H.queryDbQueue h $ do + l <- select $ from $ \r -> do + where_ (r ^. ExportedKey ==. val ik) + return (r ^. ExportedFile) + return $ map (ExportLocation . fromSFilePath . unValue) l + where + ik = toIKey k diff --git a/git-annex.cabal b/git-annex.cabal index 178531de04..af31207bda 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -787,6 +787,7 @@ Executable git-annex Config.GitConfig Creds Crypto + Database.Export Database.Fsck Database.Handle Database.Init From 42eaa340fe7bab5e54ba6bc19a4272cdc10f54bf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Sep 2017 13:55:49 -0400 Subject: [PATCH 15/31] remove some backtraces on user errors --- Command/Export.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index 1f293025bf..38bda459c1 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -71,9 +71,9 @@ seek :: ExportOptions -> CommandSeek seek o = do r <- getParsed (exportRemote o) unlessM (exportSupported (exportActions r)) $ - error "That remote does not support exports." + giveup "That remote does not support exports." - new <- fromMaybe (error "unknown tree") <$> + new <- fromMaybe (giveup "unknown tree") <$> -- Dereference the tree pointed to by the branch, commit, -- or tag. inRepo (Git.Ref.tree (exportTreeish o)) From 2c90ed1feaac6948e7042882ca444595beb76cd5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Sep 2017 14:00:54 -0400 Subject: [PATCH 16/31] flush queued changes to export db on exit --- Command/Export.hs | 2 ++ Database/Export.hs | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Command/Export.hs b/Command/Export.hs index 38bda459c1..c09253dc9e 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -108,6 +108,8 @@ seek o = do seekActions $ pure $ map (startExport r db) l void $ liftIO cleanup' + closeDb db + startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart startExport r db ti = do ek <- exportKey (Git.LsTree.sha ti) diff --git a/Database/Export.hs b/Database/Export.hs index bc79af29fe..e2986d0756 100644 --- a/Database/Export.hs +++ b/Database/Export.hs @@ -30,7 +30,7 @@ import Types.Remote (ExportLocation(..)) import Database.Persist.TH import Database.Esqueleto hiding (Key) -data ExportHandle = ExportHandle H.DbQueue +newtype ExportHandle = ExportHandle H.DbQueue share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase| Exported From 656797b4e802aba352a06aacdffbe73ed5540879 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Sep 2017 14:25:00 -0400 Subject: [PATCH 17/31] update for export --- CHANGELOG | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index cd2a9d0253..3e168de9ff 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,10 @@ git-annex (6.20170819) UNRELEASED; urgency=medium + * git-annex export: New command, can create and efficiently update + exports of trees to special remotes. + * Use git-annex initremote with exporttree=yes to set up a special remote + for use by git-annex export. + * Implemented export to directory special remotes. * Support building with feed-1.0, while still supporting older versions. * init: Display an additional message when it detects a filesystem that allows writing to files whose write bit is not set. From 4da763439b6c4f296005a4ec53443fd3e1b178cf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Sep 2017 14:33:09 -0400 Subject: [PATCH 18/31] use export db to correctly handle duplicate files Removed uncorrect UniqueKey key in db schema; a key can appear multiple times with different files. The database has to be flushed after each removal. But when adding files to the export, lots of changes are able to be queued up w/o flushing. So it's still fairly efficient. If large removals of files from exports are too slow, an alternative would be to make two passes over the diff, one pass queueing deletions from the database, then a flush and the a second pass updating the location log. But that would use more memory, and need to look up exportKey twice per removed file, so I've avoided such optimisation yet. This commit was supported by the NSF-funded DataLad project. --- Command/Export.hs | 41 +++++++++++-------- Database/Export.hs | 7 +++- Types/Remote.hs | 1 + .../exporting_trees_to_special_remotes.mdwn | 3 +- doc/todo/export.mdwn | 11 ----- 5 files changed, 31 insertions(+), 32 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index c09253dc9e..3387a14ad0 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -113,21 +113,20 @@ seek o = do startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart startExport r db ti = do ek <- exportKey (Git.LsTree.sha ti) - liftIO $ addExportLocation db (asKey ek) loc - stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do + stopUnless (liftIO $ notElem loc <$> getExportLocation db (asKey ek)) $ do showStart "export" f - next $ performExport r ek (Git.LsTree.sha ti) loc + next $ performExport r db ek (Git.LsTree.sha ti) loc where loc = ExportLocation $ toInternalGitPath f f = getTopFilePath $ Git.LsTree.file ti -performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform -performExport r ek contentsha loc = do +performExport :: Remote -> ExportHandle -> ExportKey -> Sha -> ExportLocation -> CommandPerform +performExport r db ek contentsha loc = do let storer = storeExport $ exportActions r sent <- case ek of AnnexKey k -> ifM (inAnnex k) ( metered Nothing k $ \m -> do - let rollback = void $ performUnexport r ek loc + let rollback = void $ performUnexport r db ek loc sendAnnex k rollback (\f -> storer f k loc m) , do @@ -142,11 +141,12 @@ performExport r ek contentsha loc = do liftIO $ hClose h storer tmp sha1k loc m if sent - then next $ cleanupExport r ek + then next $ cleanupExport r db ek loc else stop -cleanupExport :: Remote -> ExportKey -> CommandCleanup -cleanupExport r ek = do +cleanupExport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandCleanup +cleanupExport r db ek loc = do + liftIO $ addExportLocation db (asKey ek) loc logChange (asKey ek) (uuid r) InfoPresent return True @@ -154,23 +154,28 @@ startUnexport :: Remote -> ExportHandle -> Git.DiffTree.DiffTreeItem -> CommandS startUnexport r db diff | Git.DiffTree.srcsha diff /= nullSha = do showStart "unexport" f - oldk <- exportKey (Git.DiffTree.srcsha diff) - liftIO $ removeExportLocation db (asKey oldk) loc - next $ performUnexport r oldk loc + ek <- exportKey (Git.DiffTree.srcsha diff) + next $ performUnexport r db ek loc | otherwise = stop where loc = ExportLocation $ toInternalGitPath f f = getTopFilePath $ Git.DiffTree.file diff -performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform -performUnexport r ek loc = do +performUnexport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandPerform +performUnexport r db ek loc = do let remover = removeExport $ exportActions r ok <- remover (asKey ek) loc if ok - then next $ cleanupUnexport r ek + then next $ cleanupUnexport r db ek loc else stop -cleanupUnexport :: Remote -> ExportKey -> CommandCleanup -cleanupUnexport r ek = do - logChange (asKey ek) (uuid r) InfoMissing +cleanupUnexport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandCleanup +cleanupUnexport r db ek loc = do + liftIO $ do + removeExportLocation db (asKey ek) loc + -- Flush so that getExportLocation sees this and any + -- other removals of the key. + flushDbQueue db + whenM (liftIO $ null <$> getExportLocation db (asKey ek)) $ + logChange (asKey ek) (uuid r) InfoMissing return True diff --git a/Database/Export.hs b/Database/Export.hs index e2986d0756..dcef88854c 100644 --- a/Database/Export.hs +++ b/Database/Export.hs @@ -16,6 +16,7 @@ module Database.Export ( closeDb, addExportLocation, removeExportLocation, + flushDbQueue, getExportLocation, ExportedId, ) where @@ -37,7 +38,6 @@ Exported key IKey file SFilePath KeyFileIndex key file - UniqueKey key |] {- Opens the database, creating it if it doesn't exist yet. -} @@ -74,7 +74,10 @@ removeExportLocation h k (ExportLocation f) = queueDb h $ ik = toIKey k ef = toSFilePath f -{- Doesn't know about recently queued changes. -} +flushDbQueue :: ExportHandle -> IO () +flushDbQueue (ExportHandle h) = H.flushDbQueue h + +{- Note that this does not see recently queued changes. -} getExportLocation :: ExportHandle -> Key -> IO [ExportLocation] getExportLocation (ExportHandle h) k = H.queryDbQueue h $ do l <- select $ from $ \r -> do diff --git a/Types/Remote.hs b/Types/Remote.hs index a0174ebee4..81f1dbe23c 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -159,6 +159,7 @@ unVerified a = do -- The FilePath will be relative, and may contain unix-style path -- separators. newtype ExportLocation = ExportLocation FilePath + deriving (Eq) data ExportActions a = ExportActions { exportSupported :: a Bool diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index 118f12978a..7ff1df870a 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -147,7 +147,8 @@ remotes, don't let it be turned off. The same file contents may be in a treeish multiple times under different filenames. That complicates using location tracking. One file may have been exported and the other not, and location tracking says that the content -is present in the export. +is present in the export. A sqlite database is needed to keep track of +this. ## recording exported filenames in git-annex branch diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 828e1c55bd..99877423b1 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -18,17 +18,6 @@ there need to be a new interface in supported remotes? Work is in progress. Todo list: * Use retrieveExport when getting from export remotes. - (Needs a map from key to ExportLocation) * Efficient handling of renames. -* If the same content is present in two different files, export - location tracking can be messed up. - - When one of the files is deleted and - that tree is exported, the location log for the key will be updated - to say it's not present, even though the other file is still present. - - And, once one of the files is uploaded, the location log will - say the content is present, so the pass over the tree won't try to - upload the other file. (See design for a fix for this.) * Support export to aditional special remotes (S3 etc) * Support export to external special remotes. From 662f2a5ee7fcebd1f606d0a3704d8c59787f47ea Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Sep 2017 16:39:56 -0400 Subject: [PATCH 19/31] git annex get from exports Straightforward enough, except for the needed belt-and-suspenders sanity checks to avoid foot shooting due to exports not being key/value stores. * Even when annex.verify=false, always verify from exports. * Only get files from exports that use a backend that supports checksum verification. * Never trust exports, even if the user says to, because then `git annex drop` would drop content if the export seemed to contain a copy. This commit was supported by the NSF-funded DataLad project. --- Annex/Content.hs | 8 +++-- Logs/Trust.hs | 12 +++++-- Remote/Directory.hs | 2 +- Remote/Helper/Export.hs | 66 ++++++++++++++++++++++++++++++++++----- Types/Remote.hs | 2 +- Types/TrustLevel.hs | 2 +- doc/git-annex-export.mdwn | 15 ++++++--- doc/todo/export.mdwn | 4 ++- 8 files changed, 90 insertions(+), 21 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 0001e8ac97..b74b397537 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -354,8 +354,12 @@ shouldVerify :: VerifyConfig -> Annex Bool shouldVerify AlwaysVerify = return True shouldVerify NoVerify = return False shouldVerify DefaultVerify = annexVerify <$> Annex.getGitConfig -shouldVerify (RemoteVerify r) = shouldVerify DefaultVerify - <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r)) +shouldVerify (RemoteVerify r) = + (shouldVerify DefaultVerify + <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))) + -- Export remotes are not key/value stores, so always verify + -- content from them even when verification is disabled. + <||> Types.Remote.exportSupported (Types.Remote.exportActions r) {- Checks if there is enough free disk space to download a key - to its temp file. diff --git a/Logs/Trust.hs b/Logs/Trust.hs index 4f685be917..85b62ed743 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -65,10 +65,16 @@ trustMap = maybe trustMapLoad return =<< Annex.getState Annex.trustmap trustMapLoad :: Annex TrustMap trustMapLoad = do overrides <- Annex.getState Annex.forcetrust + l <- remoteList + -- Exports are never trusted, since they are not key/value stores. + exports <- filterM (Types.Remote.exportSupported . Types.Remote.exportActions) l + let exportoverrides = M.fromList $ + map (\r -> (Types.Remote.uuid r, UnTrusted)) exports logged <- trustMapRaw - configured <- M.fromList . catMaybes - <$> (map configuredtrust <$> remoteList) - let m = M.union overrides $ M.union configured logged + let configured = M.fromList $ mapMaybe configuredtrust l + let m = M.union exportoverrides $ + M.union overrides $ + M.union configured logged Annex.changeState $ \s -> s { Annex.trustmap = Just m } return m where diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 6adf6477aa..7769eddd29 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -44,7 +44,7 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot gen r u c gc = do cst <- remoteCost gc cheapRemoteCost let chunkconfig = getChunkConfig c - return $ Just $ exportableRemote $ specialRemote c + exportableRemote $ specialRemote c (prepareStore dir chunkconfig) (retrieve dir chunkconfig) (simplyPrepare $ remove dir) diff --git a/Remote/Helper/Export.hs b/Remote/Helper/Export.hs index 9bbbb1f59c..73ebb91417 100644 --- a/Remote/Helper/Export.hs +++ b/Remote/Helper/Export.hs @@ -8,9 +8,15 @@ module Remote.Helper.Export where import Annex.Common +import qualified Annex import Types.Remote import Types.Creds +import Types.Backend +import Types.Key +import Types.TrustLevel +import Backend import Remote.Helper.Encryptable (isEncrypted) +import Database.Export import qualified Data.Map as M @@ -27,15 +33,59 @@ exportUnsupported = ExportActions -- | A remote that supports exports when configured with exporttree=yes, -- and otherwise does not. -exportableRemote :: Remote -> Remote +exportableRemote :: Remote -> Annex (Maybe Remote) exportableRemote r = case M.lookup "exporttree" (config r) of - Just "yes" -> r - { storeKey = \_ _ _ -> do - warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it" - return False - } - _ -> r - { exportActions = exportUnsupported } + Just "yes" -> do + db <- openDb (uuid r) + + return $ Just $ r + -- Storing a key on an export would need a way to + -- look up the file(s) that the currently exported + -- tree uses for a key; there's not currently an + -- inexpensive way to do that (getExportLocation + -- only finds files that have been stored on the + -- export already). + { storeKey = \_ _ _ -> do + warning "remote is configured with exporttree=yes; use `git-annex export` to store content on it" + return False + -- Keys can be retrieved, but since an export + -- is not a true key/value store, the content of + -- the key has to be able to be strongly verified. + , retrieveKeyFile = \k _af dest p -> + if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k)) + then do + locs <- liftIO $ getExportLocation db k + case locs of + [] -> do + warning "unknown export location" + return (False, UnVerified) + (l:_) -> retrieveExport (exportActions r) k l dest p + else do + warning $ "exported content cannot be verified due to using the " ++ formatKeyVariety (keyVariety k) ++ " backend" + return (False, UnVerified) + , retrieveKeyFileCheap = \_ _ _ -> return False + -- Remove all files a key was exported to. + , removeKey = \k -> do + locs <- liftIO $ getExportLocation db k + oks <- forM locs $ \loc -> do + ok <- removeExport (exportActions r) k loc + when ok $ + liftIO $ removeExportLocation db k loc + return ok + liftIO $ flushDbQueue db + return (and oks) + -- Can't lock content on exports, since they're + -- not key/value stores, and someone else could + -- change what's exported to a file at any time. + , lockContent = Nothing + -- Check if any of the files a key was exported + -- to are present. This doesn't guarantee the + -- export contains the right content. + , checkPresent = \k -> + anyM (checkPresentExport (exportActions r) k) + =<< liftIO (getExportLocation db k) + } + _ -> return $ Just $ r { exportActions = exportUnsupported } exportableRemoteSetup :: (SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)) -> SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) exportableRemoteSetup setupaction st mu cp c gc = case st of diff --git a/Types/Remote.hs b/Types/Remote.hs index 81f1dbe23c..46750ee8da 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -159,7 +159,7 @@ unVerified a = do -- The FilePath will be relative, and may contain unix-style path -- separators. newtype ExportLocation = ExportLocation FilePath - deriving (Eq) + deriving (Show, Eq) data ExportActions a = ExportActions { exportSupported :: a Bool diff --git a/Types/TrustLevel.hs b/Types/TrustLevel.hs index 1cc4c662ee..6ec18e5128 100644 --- a/Types/TrustLevel.hs +++ b/Types/TrustLevel.hs @@ -21,7 +21,7 @@ import Types.UUID -- This order may seem backwards, but we generally want to list dead -- remotes last and trusted ones first. data TrustLevel = Trusted | SemiTrusted | UnTrusted | DeadTrusted - deriving (Eq, Enum, Ord, Bounded) + deriving (Eq, Enum, Ord, Bounded, Show) instance Default TrustLevel where def = SemiTrusted diff --git a/doc/git-annex-export.mdwn b/doc/git-annex-export.mdwn index abe00f09b6..c8d8eac9af 100644 --- a/doc/git-annex-export.mdwn +++ b/doc/git-annex-export.mdwn @@ -11,11 +11,11 @@ git annex export `treeish --to remote` Use this command to export a tree of files from a git-annex repository. Normally files are stored on a git-annex special remote named by their -keys. That is great for data storage, but your filenames are obscured. -Exporting replicates the tree to the special remote as-is. +keys. That is great for reliable data storage, but your filenames are +obscured. Exporting replicates the tree to the special remote as-is. -Mixing key/value and exports in the same remote would be a mess and so is -not allowed. So, you have to configure a remote with `exporttree=yes` +Mixing key/value storage and exports in the same remote would be a mess and +so is not allowed. You have to configure a remote with `exporttree=yes` when initially setting it up with [[git-annex-initremote]](1). Repeated exports are done efficiently, by diffing the old and new tree, @@ -24,6 +24,13 @@ and transferring only the changed files. Exports can be interrupted and resumed. However, partially uploaded files will be re-started from the beginning. +Once content has been exported to a remote, commands like `git annex get` +can download content from there the same as from other remotes. However, +since an export is not a key/value store, git-annex has to do more +verification of content downloaded from an export. Some types of keys, +that are not based on checksums, cannot be downloaded from an export. +And, git-annex will never trust an export to retain the content of a key. + # SEE ALSO [[git-annex]](1) diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 99877423b1..5813cd869e 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -17,7 +17,9 @@ there need to be a new interface in supported remotes? Work is in progress. Todo list: -* Use retrieveExport when getting from export remotes. +* `git annex get --from export` works in the repo that exported to it, + but in another repo, the export db won't be populated, so it won't work. + Maybe just show a useful error message in this case? * Efficient handling of renames. * Support export to aditional special remotes (S3 etc) * Support export to external special remotes. From a1cc9ec0fd75aa900cbc06497d7b3275d83b650d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Sep 2017 16:55:31 -0400 Subject: [PATCH 20/31] add export infication to git-annex info --- Remote/Helper/Export.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Remote/Helper/Export.hs b/Remote/Helper/Export.hs index 73ebb91417..a46f7bd6c4 100644 --- a/Remote/Helper/Export.hs +++ b/Remote/Helper/Export.hs @@ -8,12 +8,10 @@ module Remote.Helper.Export where import Annex.Common -import qualified Annex import Types.Remote import Types.Creds import Types.Backend import Types.Key -import Types.TrustLevel import Backend import Remote.Helper.Encryptable (isEncrypted) import Database.Export @@ -84,6 +82,10 @@ exportableRemote r = case M.lookup "exporttree" (config r) of , checkPresent = \k -> anyM (checkPresentExport (exportActions r) k) =<< liftIO (getExportLocation db k) + , mkUnavailable = return Nothing + , getInfo = do + is <- getInfo r + return (is++[("export", "yes")]) } _ -> return $ Just $ r { exportActions = exportUnsupported } From 1ec3a9eb056344b208930d1e5516a7fc4bab3f01 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Sep 2017 13:04:09 -0400 Subject: [PATCH 21/31] thoughts on handling renames efficiently This gets complicated, but I think this design will work! This commit was supported by the NSF-funded DataLad project. --- .../exporting_trees_to_special_remotes.mdwn | 42 +++++++++++++++---- doc/todo/export.mdwn | 6 ++- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index 7ff1df870a..0469a4fccd 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -237,11 +237,37 @@ for the current treeish. (Unless a conflicting export was made from elsewhere, but in that case, the conflict resolution will have to fix up later.) -Efficient resuming can then first check if the location log says the -export contains the content. (If not, transfer a copy.) If the location -log says the export contains the content, use CHECKPRESENTEXPORT to see if -the file exists, and if not transfer a copy. The CHECKPRESENTEXPORT check -deals with the case where the treeish has two files with the same content. -If we have a key-to-files map for the export, then we can skip the -CHECKPRESENTEXPORT check when there's only one file using a key. So, -resuming can be quite efficient. +## handling renames efficiently + +To handle two files that swap names, a temp name is required. + +Difficulty with a temp name is picking a name that won't ever be used by +any exported file. + +Interrupted exports also complicate this. While a name could be picked that +is in neither the old nor the new tree, an export could be interrupted, +leaving the file at the temp name. There needs to be something to clean +that up when the export is resumed, even if it's resumed with a different +tree. + +Could use something like ".git-annex-tmp-content-$key" as the temp name. +This hides it from casual view, which is good, and it's not depedent on the +tree, so no state needs to be maintained to clean it up. Also, using the +key in the name simplifies calculation of complicated renames (eg, renaming +A to B, B to C, C to A) + +Export can first try to rename the temp name of all keys +whose files are added in the diff. Followed by deleting the temp name +of all keys whose files are removed in the diff. That is more renames and +deletes than strictly necessary, but it will statelessly clean up +an interruped export as long as it's run again with the same new tree. + +But, an export of tree B should clean up after +an interrupted export of tree A. Some state is needed to handle this. +Before starting the export of tree A, record it somewhere. Then when +resuming, diff A..B, and rename/delete the temp names of the keys in the +diff. As well as diffing from the last fully exported tree to B and doing +the same rename/delete. + +So, before an export does anything, need to record the tree that's about +to be exported to export.log, not as an exported tree, but as a goal. diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 5813cd869e..f345534e86 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -19,7 +19,11 @@ Work is in progress. Todo list: * `git annex get --from export` works in the repo that exported to it, but in another repo, the export db won't be populated, so it won't work. - Maybe just show a useful error message in this case? + Maybe just show a useful error message in this case? + However, exporting from one repository and then trying to update the + export from another repository also doesn't work right, because the + export database is not populated. So, seems that the export database needs + to get populated based on the export log in these cases. * Efficient handling of renames. * Support export to aditional special remotes (S3 etc) * Support export to external special remotes. From 0fa948b4027497fccb880099fcf4cd8f7d502a42 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Sep 2017 13:39:33 -0400 Subject: [PATCH 22/31] record incomplete exports in export.log Not yet used, but essential for resuming cleanly. Note that, in normmal operation, only one commit is made to export.log during an export; the incomplete version only gets to the journal and is then overwritten. This commit was supported by the NSF-funded DataLad project. --- Command/Export.hs | 7 +++--- Logs/Export.hs | 56 ++++++++++++++++++++++++++++++++++++---------- doc/internals.mdwn | 15 ++++++++++--- 3 files changed, 60 insertions(+), 18 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index 3387a14ad0..878cda8e3b 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -79,9 +79,10 @@ seek o = do inRepo (Git.Ref.tree (exportTreeish o)) old <- getExport (uuid r) + recordExportBeginning (uuid r) new when (length old > 1) $ warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.." - + db <- openDb (uuid r) -- First, diff the old and new trees and delete all changed @@ -89,7 +90,7 @@ seek o = do -- have the content from the new treeish. -- -- (Also, when there was an export conflict, this resolves it.) - forM_ old $ \oldtreesha -> do + forM_ (map exportedTreeish old) $ \oldtreesha -> do (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive oldtreesha new seekActions $ pure $ map (startUnexport r db) diff @@ -99,7 +100,7 @@ seek o = do -- if this export is interrupted, there are no files left over -- from a previous export, that are not part of this export. recordExport (uuid r) $ ExportChange - { oldTreeish = old + { oldTreeish = map exportedTreeish old , newTreeish = new } diff --git a/Logs/Export.hs b/Logs/Export.hs index 1fd1460fcd..3ba77cd24e 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -14,22 +14,29 @@ import qualified Annex.Branch import qualified Git import qualified Git.Branch import Git.Tree +import Git.Sha import Git.FilePath import Logs import Logs.UUIDBased import Annex.UUID --- | Get the treeish that was exported to a special remote. +data Exported = Exported + { exportedTreeish :: Git.Ref + , incompleteExportedTreeish :: [Git.Ref] + } + deriving (Eq) + +-- | Get what's been exported to a special remote. -- -- If the list contains multiple items, there was an export conflict, -- and different trees were exported to the same special remote. -getExport :: UUID -> Annex [Git.Ref] +getExport :: UUID -> Annex [Exported] getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap . parseLogNew parseExportLog <$> Annex.Branch.get exportLog where - get (ExportLog t u) - | u == remoteuuid = Just t + get (ExportLog exported u) + | u == remoteuuid = Just exported | otherwise = Nothing data ExportChange = ExportChange @@ -39,6 +46,10 @@ data ExportChange = ExportChange -- | Record a change in what's exported to a special remote. -- +-- This is called before an export begins uploading new files to the +-- remote, but after it's cleaned up any files that need to be deleted +-- from the old treeish. +-- -- Any entries in the log for the oldTreeish will be updated to the -- newTreeish. This way, when multiple repositories are exporting to -- the same special remote, there's no conflict as long as they move @@ -50,27 +61,48 @@ recordExport :: UUID -> ExportChange -> Annex () recordExport remoteuuid ec = do c <- liftIO currentVectorClock u <- getUUID - let val = ExportLog (newTreeish ec) remoteuuid + let val = ExportLog (Exported (newTreeish ec) []) remoteuuid Annex.Branch.change exportLog $ showLogNew formatExportLog . changeLog c u val . M.mapWithKey (updateothers c u) . parseLogNew parseExportLog - graftTreeish (newTreeish ec) where - updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid')) + updateothers c u theiru le@(LogEntry _ (ExportLog exported@(Exported { exportedTreeish = t }) remoteuuid')) | u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le - | otherwise = LogEntry c (ExportLog (newTreeish ec) theiru) + | otherwise = LogEntry c (ExportLog (exported { exportedTreeish = newTreeish ec }) theiru) -data ExportLog = ExportLog Git.Ref UUID +-- | Record the beginning of an export, to allow cleaning up from +-- interrupted exports. +-- +-- This is called before any changes are made to the remote. +recordExportBeginning :: UUID -> Git.Ref -> Annex () +recordExportBeginning remoteuuid newtree = do + c <- liftIO currentVectorClock + u <- getUUID + ExportLog old _ <- fromMaybe (ExportLog (Exported emptyTree []) remoteuuid) + . M.lookup u . simpleMap + . parseLogNew parseExportLog + <$> Annex.Branch.get exportLog + let new = old { incompleteExportedTreeish = newtree:incompleteExportedTreeish old } + Annex.Branch.change exportLog $ + showLogNew formatExportLog + . changeLog c u (ExportLog new remoteuuid) + . parseLogNew parseExportLog + graftTreeish newtree + +data ExportLog = ExportLog Exported UUID formatExportLog :: ExportLog -> String -formatExportLog (ExportLog treeish remoteuuid) = - Git.fromRef treeish ++ " " ++ fromUUID remoteuuid +formatExportLog (ExportLog exported remoteuuid) = unwords $ + [ Git.fromRef (exportedTreeish exported) + , fromUUID remoteuuid + ] ++ map Git.fromRef (incompleteExportedTreeish exported) parseExportLog :: String -> Maybe ExportLog parseExportLog s = case words s of - (t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u) + (et:u:it) -> Just $ + ExportLog (Exported (Git.Ref et) (map Git.Ref it)) (toUUID u) _ -> Nothing -- To prevent git-annex branch merge conflicts, the treeish is diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 4b24ce443a..ccf1e09b64 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -187,12 +187,21 @@ Tracks what trees have been exported to special remotes by Each line starts with a timestamp, then the uuid of the repository that exported to the special remote, followed by the sha1 of the tree -that was exported, and then by the uuid of the special remote. For example: +that was exported, and then by the uuid of the special remote. - 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 +There can also be subsequent sha1s, of trees that have started to be +exported but whose export is not yet complete. The sha1 of the exported +tree can be the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904) +in order to record the beginning of the first export. + +For example: + + 1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 4b825dc642cb6eb9a060e54bf8d69288fbee4904 26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b + 1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 + 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55 -(The exported tree is also grafted into the git-annex branch, at +(The trees are also grafted into the git-annex branch, at `export.tree`, to prevent git from garbage collecting it. However, the head of the git-annex branch should never contain such a grafted in tree; the grafted tree is removed in the same commit that updates `export.log`.) From cae3704a4464bad015ee5dc20580df4388915a87 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Sep 2017 15:33:40 -0400 Subject: [PATCH 23/31] export file renaming This is seriously super hairy. It has to handle interrupted exports, which may be resumed with the same or a different tree. It also has to recover from export conflicts, which could cause the wrong content to be renamed to a file. I think this works, or is close to working. See the update to the design for how it works. This is definitely not optimal, in that it does more renames than are necessary. It would probably be worth finding the keys that are really renamed and only renaming those. But let's get the "simple" approach to work first.. This commit was supported by the NSF-funded DataLad project. --- Command/Export.hs | 165 ++++++++++++++---- Types/Remote.hs | 2 + .../exporting_trees_to_special_remotes.mdwn | 44 ++++- doc/git-annex-export.mdwn | 19 ++ 4 files changed, 189 insertions(+), 41 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index 878cda8e3b..6090b26035 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -67,6 +67,12 @@ exportKey sha = mk <$> catKey sha , keyChunkNum = Nothing } +-- To handle renames which swap files, the exported file is first renamed +-- to a stable temporary name based on the key. +exportTempName :: ExportKey -> ExportLocation +exportTempName ek = ExportLocation $ + ".git-annex-tmp-content-" ++ key2file (asKey (ek)) + seek :: ExportOptions -> CommandSeek seek o = do r <- getParsed (exportRemote o) @@ -78,23 +84,51 @@ seek o = do -- or tag. inRepo (Git.Ref.tree (exportTreeish o)) old <- getExport (uuid r) - recordExportBeginning (uuid r) new - when (length old > 1) $ - warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.." - db <- openDb (uuid r) - -- First, diff the old and new trees and delete all changed - -- files in the export. Every file that remains in the export will - -- have the content from the new treeish. + -- Clean up after incomplete export of a tree, in which + -- the next block of code below may have renamed some files to + -- temp files. Diff from the incomplete tree to the new tree, + -- and delete any temp files that the new tree can't use. + forM_ (concatMap incompleteExportedTreeish old) $ \incomplete -> + mapdiff (startUnexportTempName r db . Git.DiffTree.srcsha) incomplete new + + -- Diff the old and new trees, and delete or rename to new name all + -- changed files in the export. After this, every file that remains + -- in the export will have the content from the new treeish. -- -- (Also, when there was an export conflict, this resolves it.) - forM_ (map exportedTreeish old) $ \oldtreesha -> do - (diff, cleanup) <- inRepo $ - Git.DiffTree.diffTreeRecursive oldtreesha new - seekActions $ pure $ map (startUnexport r db) diff - void $ liftIO cleanup + case map exportedTreeish old of + [] -> return () + [oldtreesha] -> do + -- Rename all old files to temp. + mapdiff + (\diff -> startMoveToTempName r db (Git.DiffTree.file diff) (Git.DiffTree.srcsha diff)) + oldtreesha new + -- Rename from temp to new files. + mapdiff (\diff -> startMoveFromTempName r db (Git.DiffTree.dstsha diff) (Git.DiffTree.file diff)) + new oldtreesha + -- Remove all remaining temps. + mapdiff + (startUnexportTempName r db . Git.DiffTree.srcsha) + oldtreesha new + ts -> do + warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.." + forM_ ts $ \oldtreesha -> do + -- Unexport both the srcsha and the dstsha, + -- because the wrong content may have + -- been renamed to the dstsha due to the + -- export conflict. + let unexportboth d = + [ Git.DiffTree.srcsha d + , Git.DiffTree.dstsha d + ] + -- Don't rename to temp, because the + -- content is unknown; unexport instead. + mapdiff + (\diff -> startUnexport r db (Git.DiffTree.file diff) (unexportboth diff)) + oldtreesha new -- Waiting until now to record the export guarantees that, -- if this export is interrupted, there are no files left over @@ -110,6 +144,12 @@ seek o = do void $ liftIO cleanup' closeDb db + where + mapdiff a oldtreesha newtreesha = do + (diff, cleanup) <- inRepo $ + Git.DiffTree.diffTreeRecursive oldtreesha newtreesha + seekActions $ pure $ map a diff + void $ liftIO cleanup startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart startExport r db ti = do @@ -127,7 +167,7 @@ performExport r db ek contentsha loc = do sent <- case ek of AnnexKey k -> ifM (inAnnex k) ( metered Nothing k $ \m -> do - let rollback = void $ performUnexport r db ek loc + let rollback = void $ performUnexport r db [ek] loc sendAnnex k rollback (\f -> storer f k loc m) , do @@ -151,32 +191,89 @@ cleanupExport r db ek loc = do logChange (asKey ek) (uuid r) InfoPresent return True -startUnexport :: Remote -> ExportHandle -> Git.DiffTree.DiffTreeItem -> CommandStart -startUnexport r db diff - | Git.DiffTree.srcsha diff /= nullSha = do - showStart "unexport" f - ek <- exportKey (Git.DiffTree.srcsha diff) - next $ performUnexport r db ek loc - | otherwise = stop +startUnexport :: Remote -> ExportHandle -> TopFilePath -> [Git.Sha] -> CommandStart +startUnexport r db f shas = do + eks <- forM (filter (/= nullSha) shas) exportKey + if null eks + then stop + else do + showStart "unexport" f' + next $ performUnexport r db eks loc where - loc = ExportLocation $ toInternalGitPath f - f = getTopFilePath $ Git.DiffTree.file diff + loc = ExportLocation $ toInternalGitPath f' + f' = getTopFilePath f -performUnexport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandPerform -performUnexport r db ek loc = do - let remover = removeExport $ exportActions r - ok <- remover (asKey ek) loc - if ok - then next $ cleanupUnexport r db ek loc - else stop +performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform +performUnexport r db eks loc = do + ifM (allM (\ek -> removeExport (exportActions r) (asKey ek) loc) eks) + ( next $ cleanupUnexport r db eks loc + , stop + ) -cleanupUnexport :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> CommandCleanup -cleanupUnexport r db ek loc = do +cleanupUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandCleanup +cleanupUnexport r db eks loc = do liftIO $ do - removeExportLocation db (asKey ek) loc + forM_ eks $ \ek -> + removeExportLocation db (asKey ek) loc -- Flush so that getExportLocation sees this and any -- other removals of the key. flushDbQueue db - whenM (liftIO $ null <$> getExportLocation db (asKey ek)) $ - logChange (asKey ek) (uuid r) InfoMissing + remaininglocs <- liftIO $ + concat <$> forM eks (\ek -> getExportLocation db (asKey ek)) + when (null remaininglocs) $ + forM_ eks $ \ek -> + logChange (asKey ek) (uuid r) InfoMissing + return True + +startUnexportTempName :: Remote -> ExportHandle -> Git.Sha -> CommandStart +startUnexportTempName r db sha + | sha == nullSha = stop + | otherwise = do + ek <- exportKey sha + let loc@(ExportLocation f) = exportTempName ek + stopUnless (liftIO $ elem loc <$> getExportLocation db (asKey ek)) $ do + showStart "unexport" f + next $ performUnexport r db [ek] loc + +startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Git.Sha -> CommandStart +startMoveToTempName r db f sha + | sha == nullSha = stop + | otherwise = do + ek <- exportKey sha + let tmploc@(ExportLocation tmpf) = exportTempName ek + showStart "rename" (f' ++ " -> " ++ tmpf) + next $ performRename r db ek loc tmploc + where + loc = ExportLocation $ toInternalGitPath f' + f' = getTopFilePath f + +startMoveFromTempName :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart +startMoveFromTempName r db sha f + | sha == nullSha = stop + | otherwise = do + ek <- exportKey sha + stopUnless (liftIO $ elem loc <$> getExportLocation db (asKey ek)) $ do + let tmploc@(ExportLocation tmpf) = exportTempName ek + showStart "rename" (tmpf ++ " -> " ++ f') + next $ performRename r db ek tmploc loc + where + loc = ExportLocation $ toInternalGitPath f' + f' = getTopFilePath f + +performRename :: Remote -> ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandPerform +performRename r db ek src dest = do + ifM (renameExport (exportActions r) (asKey ek) src dest) + ( next $ cleanupRename db ek src dest + -- In case the special remote does not support renaming, + -- unexport the src instead. + , performUnexport r db [ek] src + ) + +cleanupRename :: ExportHandle -> ExportKey -> ExportLocation -> ExportLocation -> CommandCleanup +cleanupRename db ek src dest = do + liftIO $ do + removeExportLocation db (asKey ek) src + addExportLocation db (asKey ek) dest + -- Flush so that getExportLocation sees this. + flushDbQueue db return True diff --git a/Types/Remote.hs b/Types/Remote.hs index 46750ee8da..6f0a312f4f 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -178,5 +178,7 @@ data ExportActions a = ExportActions -- Throws an exception if the remote cannot be accessed. , checkPresentExport :: Key -> ExportLocation -> a Bool -- Renames an already exported file. + -- This may fail, if the file doesn't exist, or the remote does not + -- support renames. , renameExport :: Key -> ExportLocation -> ExportLocation -> a Bool } diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index 0469a4fccd..a8247d2b9b 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -205,7 +205,7 @@ a tree that resolves the conflict as they desire (it could be the same as one of the exported trees, or some merge of them or an entirely new tree). The UI to do this can just be another `git annex export $tree --to remote`. To resolve, diff each exported tree in turn against the resolving tree -and delete all files that differ. +and delete all files that differ. Then, upload all missing files. ## when to update export.log for efficient resuming of exports @@ -256,18 +256,48 @@ tree, so no state needs to be maintained to clean it up. Also, using the key in the name simplifies calculation of complicated renames (eg, renaming A to B, B to C, C to A) -Export can first try to rename the temp name of all keys -whose files are added in the diff. Followed by deleting the temp name -of all keys whose files are removed in the diff. That is more renames and +Export can first try to rename all files that are deleted/modified +to their key's temp name (falling back to deleting since not all +special remotes support rename), and then, in a second pass, rename +from the temp name to the new name. Followed by deleting the temp name +of all keys whose files are deleted in the diff. That is more renames and deletes than strictly necessary, but it will statelessly clean up an interruped export as long as it's run again with the same new tree. But, an export of tree B should clean up after an interrupted export of tree A. Some state is needed to handle this. Before starting the export of tree A, record it somewhere. Then when -resuming, diff A..B, and rename/delete the temp names of the keys in the -diff. As well as diffing from the last fully exported tree to B and doing -the same rename/delete. +resuming, diff A..B, and delete the temp names of the keys in the +diff. (Can't rename here, because we don't know what was the content +of a file when an export was interrupted.) So, before an export does anything, need to record the tree that's about to be exported to export.log, not as an exported tree, but as a goal. + +## renames and export conflicts + +What is there's an export conflict going on at the same time that a file +in the export gets renamed? + +Suppose that there are two git repos A and B, each exporting to the same +remote. A and B are not currently communicating. A exports T1 which +contains F. B exports T2, which has a different content for F. + +Then A exports T3, which renames F to G. If that rename is done +on the remote, then A will think it's successfully exported T3, +but G will have F's content from T2, not from T1. + +When A and B reconnect, the export conflict will be detected. +To resolve the export conflict, it says above to: + +> To resolve, diff each exported tree in turn against the resolving tree +> and delete all files that differ. Then, upload all missing files. + +Assume that the resolving tree is T3. So B's export of T2 is diffed against +T3. F differs and is deleted (no change). G differs and is deleted, +which fixes up the problem that the wrong content was renamed to G. +G is missing so gets uploaded. + +So, this works, as long as "delete all files that differ" means it +deletes both old and new files. And as long as conflict resolution does not +itself stash away files in the temp name for later renaming. diff --git a/doc/git-annex-export.mdwn b/doc/git-annex-export.mdwn index c8d8eac9af..e3cbcbd7a6 100644 --- a/doc/git-annex-export.mdwn +++ b/doc/git-annex-export.mdwn @@ -31,6 +31,25 @@ verification of content downloaded from an export. Some types of keys, that are not based on checksums, cannot be downloaded from an export. And, git-annex will never trust an export to retain the content of a key. +# EXPORT CONFLICTS + +If two different git-annex repositories are both exporting different trees +to the same special remote, it's possible for an export conflict to occur. +This leaves the special remote with some files from one tree, and some +files from the other. Files in the special remote may have entirely the +wrong content as well. + +It's not possible for git-annex to detect when making an export will result +in an export conflict. The best way to avoid export conflicts is to either +only ever export to a special remote from a single repository, or to have a +rule about the tree that you export to the special remote. For example, if +you always export origin/master after pushing to origin, then an export +conflict can't happen. + +An export conflict can only be detected after the two git repositories +that produced it get back in sync. Then the next time you run `git annex +export`, it will detect the export conflict, and resolve it. + # SEE ALSO [[git-annex]](1) From 3ccf661d7c209ece6bcbc8f3b0fb6969ddb4c61c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Sep 2017 15:46:35 -0400 Subject: [PATCH 24/31] todo --- doc/git-annex-export.mdwn | 7 ++++--- doc/todo/export.mdwn | 4 ++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/git-annex-export.mdwn b/doc/git-annex-export.mdwn index e3cbcbd7a6..72319a8fcf 100644 --- a/doc/git-annex-export.mdwn +++ b/doc/git-annex-export.mdwn @@ -15,8 +15,9 @@ keys. That is great for reliable data storage, but your filenames are obscured. Exporting replicates the tree to the special remote as-is. Mixing key/value storage and exports in the same remote would be a mess and -so is not allowed. You have to configure a remote with `exporttree=yes` -when initially setting it up with [[git-annex-initremote]](1). +so is not allowed. You have to configure a special remote with +`exporttree=yes` when initially setting it up with +[[git-annex-initremote]](1). Repeated exports are done efficiently, by diffing the old and new tree, and transferring only the changed files. @@ -54,7 +55,7 @@ export`, it will detect the export conflict, and resolve it. [[git-annex]](1) -[[git-annex-export]](1) +[[git-annex-initremote]](1) # AUTHOR diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index f345534e86..24b49ca850 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -17,6 +17,10 @@ there need to be a new interface in supported remotes? Work is in progress. Todo list: +* initremote: Don't allow "exporttree=yes" to be set when the special remote + does not support exports. That would be confusing since the user would + set up a special remote for exports, but `git annex export` to it would + later fail.. * `git annex get --from export` works in the repo that exported to it, but in another repo, the export db won't be populated, so it won't work. Maybe just show a useful error message in this case? From 5cd340ce27603480da2fa70a34cfbb6e5129b37e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Sep 2017 15:48:14 -0400 Subject: [PATCH 25/31] rename bug fix --- Remote/Directory.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 7769eddd29..512ba1cef7 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -261,7 +261,7 @@ checkPresentExportDirectory d _k loc = renameExportDirectory :: FilePath -> Key -> ExportLocation -> ExportLocation -> Annex Bool renameExportDirectory d _k oldloc newloc = liftIO $ catchBoolIO $ do - createDirectoryIfMissing True dest + createDirectoryIfMissing True (takeDirectory dest) renameFile src dest removeExportLocation d oldloc return True From 4f657ba918975aaf72f92b4aacf10879edd52265 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Sep 2017 15:59:02 -0400 Subject: [PATCH 26/31] bugfix --- Command/Export.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index 6090b26035..d397b2defa 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -49,6 +49,7 @@ optParser _ = ExportOptions -- An export includes both annexed files and files stored in git. -- For the latter, a SHA1 key is synthesized. data ExportKey = AnnexKey Key | GitKey Key + deriving (Show) asKey :: ExportKey -> Key asKey (AnnexKey k) = k @@ -108,7 +109,7 @@ seek o = do oldtreesha new -- Rename from temp to new files. mapdiff (\diff -> startMoveFromTempName r db (Git.DiffTree.dstsha diff) (Git.DiffTree.file diff)) - new oldtreesha + oldtreesha new -- Remove all remaining temps. mapdiff (startUnexportTempName r db . Git.DiffTree.srcsha) @@ -252,8 +253,8 @@ startMoveFromTempName r db sha f | sha == nullSha = stop | otherwise = do ek <- exportKey sha - stopUnless (liftIO $ elem loc <$> getExportLocation db (asKey ek)) $ do - let tmploc@(ExportLocation tmpf) = exportTempName ek + let tmploc@(ExportLocation tmpf) = exportTempName ek + stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do showStart "rename" (tmpf ++ " -> " ++ f') next $ performRename r db ek tmploc loc where From 6ab14710fcfb9b4610cd1f0380d092ab4db69180 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Sep 2017 17:07:49 -0400 Subject: [PATCH 27/31] fix consistency bug reading from export database The export database has writes made to it and then expects to read back the same data immediately. But, the way that Database.Handle does writes, in order to support multiple writers, makes that not work, due to caching issues. This resulted in export re-uploading files it had already successfully renamed into place. Fixed by allowing databases to be opened in MultiWriter or SingleWriter mode. The export database only needs to support a single writer; it does not make sense for multiple exports to run at the same time to the same special remote. All other databases still use MultiWriter mode. And by inspection, nothing else in git-annex seems to be relying on being able to immediately query for changes that were just written to the database. This commit was supported by the NSF-funded DataLad project. --- Database/Export.hs | 2 +- Database/Fsck.hs | 2 +- Database/Handle.hs | 65 ++++++++++++++----- Database/Keys.hs | 2 +- Database/Queue.hs | 12 ++-- .../exporting_trees_to_special_remotes.mdwn | 1 + doc/todo/export.mdwn | 4 +- 7 files changed, 65 insertions(+), 23 deletions(-) diff --git a/Database/Export.hs b/Database/Export.hs index dcef88854c..00c6ab2511 100644 --- a/Database/Export.hs +++ b/Database/Export.hs @@ -48,7 +48,7 @@ openDb u = do unlessM (liftIO $ doesFileExist db) $ do initDb db $ void $ runMigrationSilent migrateExport - h <- liftIO $ H.openDbQueue db "exported" + h <- liftIO $ H.openDbQueue H.SingleWriter db "exported" return $ ExportHandle h closeDb :: ExportHandle -> Annex () diff --git a/Database/Fsck.hs b/Database/Fsck.hs index 9affeac856..1ce513dcf9 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -63,7 +63,7 @@ openDb u = do initDb db $ void $ runMigrationSilent migrateFsck lockFileCached =<< fromRepo (gitAnnexFsckDbLock u) - h <- liftIO $ H.openDbQueue db "fscked" + h <- liftIO $ H.openDbQueue H.MultiWriter db "fscked" return $ FsckHandle h u closeDb :: FsckHandle -> Annex () diff --git a/Database/Handle.hs b/Database/Handle.hs index 7827be7497..f5a0a5ddaa 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -9,6 +9,7 @@ module Database.Handle ( DbHandle, + DbConcurrency(..), openDb, TableName, queryDb, @@ -35,27 +36,49 @@ import System.IO {- A DbHandle is a reference to a worker thread that communicates with - the database. It has a MVar which Jobs are submitted to. -} -data DbHandle = DbHandle (Async ()) (MVar Job) +data DbHandle = DbHandle DbConcurrency (Async ()) (MVar Job) {- Name of a table that should exist once the database is initialized. -} type TableName = String +{- Sqlite only allows a single write to a database at a time; a concurrent + - write will crash. + - + - While a DbHandle serializes concurrent writes from + - multiple threads. But, when a database can be written to by + - multiple processes concurrently, use MultiWriter to make writes + - to the database be done robustly. + - + - The downside of using MultiWriter is that after writing a change to the + - database, the a query using the same DbHandle will not immediately see + - the change! This is because the change is actually written using a + - separate database connection, and caching can prevent seeing the change. + - Also, consider that if multiple processes are writing to a database, + - you can't rely on seeing values you've just written anyway, as another + - process may change them. + - + - When a database can only be written to by a single process, use + - SingleWriter. Changes written to the database will always be immediately + - visible then. + -} +data DbConcurrency = SingleWriter | MultiWriter + {- Opens the database, but does not perform any migrations. Only use - - if the database is known to exist and have the right tables. -} -openDb :: FilePath -> TableName -> IO DbHandle -openDb db tablename = do + - once the database is known to exist and have the right tables. -} +openDb :: DbConcurrency -> FilePath -> TableName -> IO DbHandle +openDb dbconcurrency db tablename = do jobs <- newEmptyMVar worker <- async (workerThread (T.pack db) tablename jobs) -- work around https://github.com/yesodweb/persistent/issues/474 liftIO $ fileEncoding stderr - return $ DbHandle worker jobs + return $ DbHandle dbconcurrency worker jobs {- This is optional; when the DbHandle gets garbage collected it will - auto-close. -} closeDb :: DbHandle -> IO () -closeDb (DbHandle worker jobs) = do +closeDb (DbHandle _ worker jobs) = do putMVar jobs CloseJob wait worker @@ -68,9 +91,12 @@ closeDb (DbHandle worker jobs) = do - Only one action can be run at a time against a given DbHandle. - If called concurrently in the same process, this will block until - it is able to run. + - + - Note that when the DbHandle was opened in MultiWriter mode, recent + - writes may not be seen by queryDb. -} queryDb :: DbHandle -> SqlPersistM a -> IO a -queryDb (DbHandle _ jobs) a = do +queryDb (DbHandle _ _ jobs) a = do res <- newEmptyMVar putMVar jobs $ QueryJob $ liftIO . putMVar res =<< tryNonAsync a @@ -79,9 +105,9 @@ queryDb (DbHandle _ jobs) a = do {- Writes a change to the database. - - - If a database is opened multiple times and there's a concurrent writer, - - the write could fail. Retries repeatedly for up to 10 seconds, - - which should avoid all but the most exceptional problems. + - In MultiWriter mode, catches failure to write to the database, + - and retries repeatedly for up to 10 seconds, which should avoid + - all but the most exceptional problems. -} commitDb :: DbHandle -> SqlPersistM () -> IO () commitDb h wa = robustly Nothing 100 (commitDb' h wa) @@ -97,15 +123,22 @@ commitDb h wa = robustly Nothing 100 (commitDb' h wa) robustly (Just e) (n-1) a commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ()) -commitDb' (DbHandle _ jobs) a = do +commitDb' (DbHandle MultiWriter _ jobs) a = do res <- newEmptyMVar - putMVar jobs $ ChangeJob $ \runner -> + putMVar jobs $ RobustChangeJob $ \runner -> liftIO $ putMVar res =<< tryNonAsync (runner a) takeMVar res +commitDb' (DbHandle SingleWriter _ jobs) a = do + res <- newEmptyMVar + putMVar jobs $ ChangeJob $ + liftIO . putMVar res =<< tryNonAsync a + takeMVar res + `catchNonAsync` (const $ error "sqlite commit crashed") data Job = QueryJob (SqlPersistM ()) - | ChangeJob ((SqlPersistM () -> IO ()) -> IO ()) + | ChangeJob (SqlPersistM ()) + | RobustChangeJob ((SqlPersistM () -> IO ()) -> IO ()) | CloseJob workerThread :: T.Text -> TableName -> MVar Job -> IO () @@ -127,10 +160,12 @@ workerThread db tablename jobs = Left BlockedIndefinitelyOnMVar -> return () Right CloseJob -> return () Right (QueryJob a) -> a >> loop - -- change is run in a separate database connection + Right (ChangeJob a) -> a >> loop + -- Change is run in a separate database connection -- since sqlite only supports a single writer at a -- time, and it may crash the database connection - Right (ChangeJob a) -> liftIO (a (runSqliteRobustly tablename db)) >> loop + -- that the write is made to. + Right (RobustChangeJob a) -> liftIO (a (runSqliteRobustly tablename db)) >> loop -- like runSqlite, but calls settle on the raw sql Connection. runSqliteRobustly :: TableName -> T.Text -> (SqlPersistM a) -> IO a diff --git a/Database/Keys.hs b/Database/Keys.hs index b9440ac1ad..282da9f941 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -124,7 +124,7 @@ openDb createdb _ = catchPermissionDenied permerr $ withExclusiveLock gitAnnexKe open db (False, False) -> return DbUnavailable where - open db = liftIO $ DbOpen <$> H.openDbQueue db SQL.containedTable + open db = liftIO $ DbOpen <$> H.openDbQueue H.MultiWriter db SQL.containedTable -- If permissions don't allow opening the database, treat it as if -- it does not exist. permerr e = case createdb of diff --git a/Database/Queue.hs b/Database/Queue.hs index 143871079b..f0a2d2b654 100644 --- a/Database/Queue.hs +++ b/Database/Queue.hs @@ -9,6 +9,7 @@ module Database.Queue ( DbQueue, + DbConcurrency(..), openDbQueue, queryDbQueue, closeDbQueue, @@ -35,9 +36,9 @@ data DbQueue = DQ DbHandle (MVar Queue) {- Opens the database queue, but does not perform any migrations. Only use - if the database is known to exist and have the right tables; ie after - running initDb. -} -openDbQueue :: FilePath -> TableName -> IO DbQueue -openDbQueue db tablename = DQ - <$> openDb db tablename +openDbQueue :: DbConcurrency -> FilePath -> TableName -> IO DbQueue +openDbQueue dbconcurrency db tablename = DQ + <$> openDb dbconcurrency db tablename <*> (newMVar =<< emptyQueue) {- This or flushDbQueue must be called, eg at program exit to ensure @@ -60,8 +61,11 @@ flushDbQueue (DQ hdl qvar) = do {- Makes a query using the DbQueue's database connection. - This should not be used to make changes to the database! - - - Queries will not return changes that have been recently queued, + - Queries will not see changes that have been recently queued, - so use with care. + - + - Also, when the database was opened in MultiWriter mode, + - queries may not see changes even after flushDbQueue. -} queryDbQueue :: DbQueue -> SqlPersistM a -> IO a queryDbQueue (DQ hdl _) = queryDb hdl diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index a8247d2b9b..2b5217d959 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -273,6 +273,7 @@ of a file when an export was interrupted.) So, before an export does anything, need to record the tree that's about to be exported to export.log, not as an exported tree, but as a goal. +Then on resume, the temp files for that can be cleaned up. ## renames and export conflicts diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 24b49ca850..abfa520059 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -28,6 +28,8 @@ Work is in progress. Todo list: export from another repository also doesn't work right, because the export database is not populated. So, seems that the export database needs to get populated based on the export log in these cases. -* Efficient handling of renames. +* Currently all modified/deleted files are renamed to temp files, + even when they won't be used. Avoid doing this unless the + temp file will be renamed to the new filename. * Support export to aditional special remotes (S3 etc) * Support export to external special remotes. From 45d30820ac71d61802884047ccedd3ed60c5cb07 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Sep 2017 12:58:40 -0400 Subject: [PATCH 28/31] document new stuff for external special remotes Got rid of RENAMEEXPORT-UNSUPPORTED, no reason not to use RENAMEEXPORT-FAILURE for that. This commit was supported by the NSF-funded DataLad project. --- .../exporting_trees_to_special_remotes.mdwn | 5 +- .../external_special_remote_protocol.mdwn | 54 ++++++++++++++++++- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/doc/design/exporting_trees_to_special_remotes.mdwn b/doc/design/exporting_trees_to_special_remotes.mdwn index 2b5217d959..6e7cc68dbd 100644 --- a/doc/design/exporting_trees_to_special_remotes.mdwn +++ b/doc/design/exporting_trees_to_special_remotes.mdwn @@ -114,9 +114,8 @@ Here's the changes to the latter: * `RENAMEEXPORT Key NewName` Requests the remote rename a file stored on it from the previously provided Name to the NewName. - The remote responds with `RENAMEEXPORT-SUCCESS`, - `RENAMEEXPORT-FAILURE`, or with `RENAMEEXPORT-UNSUPPORTED` if an efficient - rename cannot be done. + The remote responds with `RENAMEEXPORT-SUCCESS` or with + `RENAMEEXPORT-FAILURE` if an efficient rename cannot be done. To support old external special remote programs that have not been updated to support exports, git-annex will need to handle an `ERROR` response diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index 87a838bd4d..8a34bb2d7a 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -43,7 +43,8 @@ the version of the protocol it is using. Once it knows the version, git-annex will generally send a message telling the special remote to start up. -(Or it might send a INITREMOTE, so don't hardcode this order.) +(Or it might send an INITREMOTE or EXPORTSUPPORTED, +so don't hardcode this order.) PREPARE @@ -102,7 +103,7 @@ The following requests *must* all be supported by the special remote. So any one-time setup tasks should be done idempotently. * `PREPARE` Tells the remote that it's time to prepare itself to be used. - Only INITREMOTE can come before this. + Only INITREMOTE or EXPORTSUPPORTED can come before this. * `TRANSFER STORE|RETRIEVE Key File` Requests the transfer of a key. For STORE, the File is the file to upload; for RETRIEVE the File is where to store the download. @@ -143,6 +144,46 @@ replying with `UNSUPPORTED-REQUEST` is acceptable. network access. This is not needed when `SETURIPRESENT` is used, since such uris are automatically displayed by `git annex whereis`. +* `EXPORTSUPPORTED` + Used to check if a special remote supports exports. The remote + responds with either `EXPORTSUPPORTED-SUCCESS` or + `EXPORTSUPPORTED-FAILURE`. Note that this request may be made before + or after `PREPARE`. +* `EXPORT Name` + Comes immediately before each of the following export-related requests, + specifying the name of the exported file. It will be in the form + of a relative path, and may contain path separators, whitespace, + and other special characters. +* `TRANSFEREXPORT STORE|RETRIEVE Key File` + Requests the transfer of a File on local disk to or from the previously + provided Name on the special remote. + Note that it's important that, while a file is being stored, + CHECKPRESENTEXPORT not indicate it's present until all the data has + been transferred. + The remote responds with either `TRANSFER-SUCCESS` or + `TRANSFER-FAILURE`, and a remote where exports do not make sense + may always fail. +* `CHECKPRESENTEXPORT Key` + Requests the remote to check if the previously provided Name is present + in it. + The remote responds with `CHECKPRESENT-SUCCESS`, `CHECKPRESENT-FAILURE`, + or `CHECKPRESENT-UNKNOWN`. +* `REMOVEEXPORT Key` + Requests the remote to remove content stored by `TRANSFEREXPORT` + with the previously provided Name. + The remote responds with either `REMOVE-SUCCESS` or + `REMOVE-FAILURE`. + If the content was already not present in the remote, it should + respond with `REMOVE-SUCCESS`. +* `RENAMEEXPORT Key NewName` + Requests the remote rename a file stored on it from the previously + provided Name to the NewName. + The remote responds with `RENAMEEXPORT-SUCCESS` or + `RENAMEEXPORT-FAILURE`. + +To support old external special remote programs that have not been updated +to support exports, git-annex will need to handle an `ERROR` response +when using any of the above. More optional requests may be added, without changing the protocol version, so if an unknown request is seen, reply with `UNSUPPORTED-REQUEST`. @@ -210,6 +251,15 @@ while it's handling a request. stored in the special remote. * `WHEREIS-FAILURE` Indicates that no location is known for a key. +* `EXPORTSUPPORTED-SUCCESS` + Indicates that it makes sense to use this special remote as an export. +* `EXPORTSUPPORTED` + Indicates that it does not make sense to use this special remote as an + export. +* `RENAMEEXPORT-SUCCESS` + Indicates that a `RENAMEEXPORT` was done successfully. +* `RENAMEEXPORT-FAILURE` + Indicates that a `RENAMEEXPORT` failed for whatever reason. * `UNSUPPORTED-REQUEST` Indicates that the special remote does not know how to handle a request. From 16eb2f976c7b217ae23411ef10ec23f0592cc9bc Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Sep 2017 13:45:31 -0400 Subject: [PATCH 29/31] prevent exporttree=yes on remotes that don't support exports Don't allow "exporttree=yes" to be set when the special remote does not support exports. That would be confusing since the user would set up a special remote for exports, but `git annex export` to it would later fail. This commit was supported by the NSF-funded DataLad project. --- Annex/Content.hs | 2 +- Command/Export.hs | 2 +- Logs/Trust.hs | 2 +- Remote.hs | 1 + Remote/BitTorrent.hs | 13 ++++--- Remote/Bup.hs | 13 ++++--- Remote/Ddar.hs | 13 ++++--- Remote/Directory.hs | 18 ++++----- Remote/External.hs | 13 ++++--- Remote/GCrypt.hs | 13 ++++--- Remote/Git.hs | 13 ++++--- Remote/Glacier.hs | 13 ++++--- Remote/Helper/Export.hs | 85 ++++++++++++++++++++++++++--------------- Remote/Hook.hs | 13 ++++--- Remote/List.hs | 8 ++-- Remote/P2P.hs | 13 ++++--- Remote/Rsync.hs | 13 ++++--- Remote/S3.hs | 13 ++++--- Remote/Tahoe.hs | 13 ++++--- Remote/Web.hs | 13 ++++--- Remote/WebDAV.hs | 13 ++++--- Types/Remote.hs | 27 +++++++------ doc/todo/export.mdwn | 4 -- 23 files changed, 186 insertions(+), 145 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index b74b397537..0b665d4dcc 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -359,7 +359,7 @@ shouldVerify (RemoteVerify r) = <&&> pure (remoteAnnexVerify (Types.Remote.gitconfig r))) -- Export remotes are not key/value stores, so always verify -- content from them even when verification is disabled. - <||> Types.Remote.exportSupported (Types.Remote.exportActions r) + <||> Types.Remote.isExportSupported r {- Checks if there is enough free disk space to download a key - to its temp file. diff --git a/Command/Export.hs b/Command/Export.hs index d397b2defa..2cf453ea14 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -77,7 +77,7 @@ exportTempName ek = ExportLocation $ seek :: ExportOptions -> CommandSeek seek o = do r <- getParsed (exportRemote o) - unlessM (exportSupported (exportActions r)) $ + unlessM (isExportSupported r) $ giveup "That remote does not support exports." new <- fromMaybe (giveup "unknown tree") <$> diff --git a/Logs/Trust.hs b/Logs/Trust.hs index 85b62ed743..54cafc9f43 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -67,7 +67,7 @@ trustMapLoad = do overrides <- Annex.getState Annex.forcetrust l <- remoteList -- Exports are never trusted, since they are not key/value stores. - exports <- filterM (Types.Remote.exportSupported . Types.Remote.exportActions) l + exports <- filterM Types.Remote.isExportSupported l let exportoverrides = M.fromList $ map (\r -> (Types.Remote.uuid r, UnTrusted)) exports logged <- trustMapRaw diff --git a/Remote.hs b/Remote.hs index 877c9f37de..8d826712c1 100644 --- a/Remote.hs +++ b/Remote.hs @@ -53,6 +53,7 @@ module Remote ( checkAvailable, isXMPPRemote, claimingUrl, + isExportSupported, ) where import Data.Ord diff --git a/Remote/BitTorrent.hs b/Remote/BitTorrent.hs index 9a1be1c0ea..37594bd110 100644 --- a/Remote/BitTorrent.hs +++ b/Remote/BitTorrent.hs @@ -36,12 +36,13 @@ import qualified Data.ByteString.Lazy as B #endif remote :: RemoteType -remote = RemoteType { - typename = "bittorrent", - enumerate = list, - generate = gen, - setup = error "not supported" -} +remote = RemoteType + { typename = "bittorrent" + , enumerate = list + , generate = gen + , setup = error "not supported" + , exportSupported = exportUnsupported + } -- There is only one bittorrent remote, and it always exists. list :: Bool -> Annex [Git.Repo] diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 6ff2aa885a..4180cbb7d4 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -35,12 +35,13 @@ import Utility.Metered type BupRepo = String remote :: RemoteType -remote = RemoteType { - typename = "bup", - enumerate = const (findSpecialRemotes "buprepo"), - generate = gen, - setup = bupSetup -} +remote = RemoteType + { typename = "bup" + , enumerate = const (findSpecialRemotes "buprepo") + , generate = gen + , setup = bupSetup + , exportSupported = exportUnsupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index c5d02a4e6a..3949bf5698 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -30,12 +30,13 @@ data DdarRepo = DdarRepo } remote :: RemoteType -remote = RemoteType { - typename = "ddar", - enumerate = const (findSpecialRemotes "ddarrepo"), - generate = gen, - setup = ddarSetup -} +remote = RemoteType + { typename = "ddar" + , enumerate = const (findSpecialRemotes "ddarrepo") + , generate = gen + , setup = ddarSetup + , exportSupported = exportUnsupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 512ba1cef7..22413b7e9e 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -33,18 +33,19 @@ import Utility.Metered import Utility.Tmp remote :: RemoteType -remote = RemoteType { - typename = "directory", - enumerate = const (findSpecialRemotes "directory"), - generate = gen, - setup = exportableRemoteSetup directorySetup -} +remote = RemoteType + { typename = "directory" + , enumerate = const (findSpecialRemotes "directory") + , generate = gen + , setup = directorySetup + , exportSupported = exportIsSupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do cst <- remoteCost gc cheapRemoteCost let chunkconfig = getChunkConfig c - exportableRemote $ specialRemote c + return $ Just $ specialRemote c (prepareStore dir chunkconfig) (retrieve dir chunkconfig) (simplyPrepare $ remove dir) @@ -61,8 +62,7 @@ gen r u c gc = do , checkPresent = checkPresentDummy , checkPresentCheap = True , exportActions = ExportActions - { exportSupported = return True - , storeExport = storeExportDirectory dir + { storeExport = storeExportDirectory dir , retrieveExport = retrieveExportDirectory dir , removeExport = removeExportDirectory dir , checkPresentExport = checkPresentExportDirectory dir diff --git a/Remote/External.hs b/Remote/External.hs index fca60a995f..71a07d3ea7 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -40,12 +40,13 @@ import System.Log.Logger (debugM) import qualified Data.Map as M remote :: RemoteType -remote = RemoteType { - typename = "external", - enumerate = const (findSpecialRemotes "externaltype"), - generate = gen, - setup = externalSetup -} +remote = RemoteType + { typename = "external" + , enumerate = const (findSpecialRemotes "externaltype") + , generate = gen + , setup = externalSetup + , exportSupported = exportUnsupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index dd681a75c7..3270a1dc7e 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -52,14 +52,15 @@ import Utility.Gpg import Utility.SshHost remote :: RemoteType -remote = RemoteType { - typename = "gcrypt", +remote = RemoteType + { typename = "gcrypt" -- Remote.Git takes care of enumerating gcrypt remotes too, -- and will call our gen on them. - enumerate = const (return []), - generate = gen, - setup = gCryptSetup -} + , enumerate = const (return []) + , generate = gen + , setup = gCryptSetup + , exportSupported = exportUnsupported + } chainGen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) chainGen gcryptr u c gc = do diff --git a/Remote/Git.hs b/Remote/Git.hs index 64fb51af83..02957fda29 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -67,12 +67,13 @@ import qualified Data.Map as M import Network.URI remote :: RemoteType -remote = RemoteType { - typename = "git", - enumerate = list, - generate = gen, - setup = gitSetup -} +remote = RemoteType + { typename = "git" + , enumerate = list + , generate = gen + , setup = gitSetup + , exportSupported = exportUnsupported + } list :: Bool -> Annex [Git.Repo] list autoinit = do diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 67e1b8b2e0..40a92c7009 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -30,12 +30,13 @@ type Vault = String type Archive = FilePath remote :: RemoteType -remote = RemoteType { - typename = "glacier", - enumerate = const (findSpecialRemotes "glacier"), - generate = gen, - setup = glacierSetup -} +remote = RemoteType + { typename = "glacier" + , enumerate = const (findSpecialRemotes "glacier") + , generate = gen + , setup = glacierSetup + , exportSupported = exportUnsupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost diff --git a/Remote/Helper/Export.hs b/Remote/Helper/Export.hs index a46f7bd6c4..58533155bb 100644 --- a/Remote/Helper/Export.hs +++ b/Remote/Helper/Export.hs @@ -5,11 +5,12 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE FlexibleInstances #-} + module Remote.Helper.Export where import Annex.Common import Types.Remote -import Types.Creds import Types.Backend import Types.Key import Backend @@ -19,24 +20,60 @@ import Database.Export import qualified Data.Map as M -- | Use for remotes that do not support exports. -exportUnsupported :: ExportActions Annex -exportUnsupported = ExportActions - { exportSupported = return False - , storeExport = \_ _ _ _ -> return False - , retrieveExport = \_ _ _ _ -> return (False, UnVerified) - , removeExport = \_ _ -> return False - , checkPresentExport = \_ _ -> return False - , renameExport = \_ _ _ -> return False - } +class HasExportUnsupported a where + exportUnsupported :: a --- | A remote that supports exports when configured with exporttree=yes, --- and otherwise does not. -exportableRemote :: Remote -> Annex (Maybe Remote) -exportableRemote r = case M.lookup "exporttree" (config r) of - Just "yes" -> do +instance HasExportUnsupported (RemoteConfig -> RemoteGitConfig -> Annex Bool) where + exportUnsupported = \_ _ -> return False + +instance HasExportUnsupported (ExportActions Annex) where + exportUnsupported = ExportActions + { storeExport = \_ _ _ _ -> return False + , retrieveExport = \_ _ _ _ -> return (False, UnVerified) + , removeExport = \_ _ -> return False + , checkPresentExport = \_ _ -> return False + , renameExport = \_ _ _ -> return False + } + +exportIsSupported :: RemoteConfig -> RemoteGitConfig -> Annex Bool +exportIsSupported = \_ _ -> return True + +-- | Prevent or allow exporttree=yes when setting up a new remote, +-- depending on exportSupported and other configuration. +adjustExportableRemoteType :: RemoteType -> RemoteType +adjustExportableRemoteType rt = rt { setup = setup' } + where + setup' st mu cp c gc = do + let cont = setup rt st mu cp c gc + ifM (exportSupported rt c gc) + ( case st of + Init -> case M.lookup "exporttree" c of + Just "yes" | isEncrypted c -> + giveup "cannot enable both encryption and exporttree" + _ -> cont + Enable oldc + | M.lookup "exporttree" c /= M.lookup "exporttree" oldc -> + giveup "cannot change exporttree of existing special remote" + | otherwise -> cont + , case M.lookup "exporttree" c of + Just "yes" -> giveup "exporttree=yes is not supported by this special remote" + _ -> cont + ) + +-- | If the remote is exportSupported, and exporttree=yes, adjust the +-- remote to be an export. +adjustExportable :: Remote -> Annex Remote +adjustExportable r = case M.lookup "exporttree" (config r) of + Just "yes" -> ifM (isExportSupported r) + ( isexport + , notexport + ) + _ -> notexport + where + notexport = return $ r { exportActions = exportUnsupported } + isexport = do db <- openDb (uuid r) - - return $ Just $ r + return $ r -- Storing a key on an export would need a way to -- look up the file(s) that the currently exported -- tree uses for a key; there's not currently an @@ -87,17 +124,3 @@ exportableRemote r = case M.lookup "exporttree" (config r) of is <- getInfo r return (is++[("export", "yes")]) } - _ -> return $ Just $ r { exportActions = exportUnsupported } - -exportableRemoteSetup :: (SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)) -> SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) -exportableRemoteSetup setupaction st mu cp c gc = case st of - Init -> case M.lookup "exporttree" c of - Just "yes" | isEncrypted c -> - giveup "cannot enable both encryption and exporttree" - _ -> cont - Enable oldc - | M.lookup "exporttree" c /= M.lookup "exporttree" oldc -> - giveup "cannot change exporttree of existing special remote" - | otherwise -> cont - where - cont = setupaction st mu cp c gc diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 5be4339e33..d7c7eb6b82 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -26,12 +26,13 @@ type Action = String type HookName = String remote :: RemoteType -remote = RemoteType { - typename = "hook", - enumerate = const (findSpecialRemotes "hooktype"), - generate = gen, - setup = hookSetup -} +remote = RemoteType + { typename = "hook" + , enumerate = const (findSpecialRemotes "hooktype") + , generate = gen + , setup = hookSetup + , exportSupported = exportUnsupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do diff --git a/Remote/List.hs b/Remote/List.hs index a5e305622f..2dc5e4823a 100644 --- a/Remote/List.hs +++ b/Remote/List.hs @@ -18,6 +18,7 @@ import Types.Remote import Annex.UUID import Remote.Helper.Hooks import Remote.Helper.ReadOnly +import Remote.Helper.Export import qualified Git import qualified Git.Config @@ -42,7 +43,7 @@ import qualified Remote.Hook import qualified Remote.External remoteTypes :: [RemoteType] -remoteTypes = +remoteTypes = map adjustExportableRemoteType [ Remote.Git.remote , Remote.GCrypt.remote , Remote.P2P.remote @@ -100,8 +101,9 @@ remoteGen m t r = do u <- getRepoUUID r gc <- Annex.getRemoteGitConfig r let c = fromMaybe M.empty $ M.lookup u m - mrmt <- generate t r u c gc - return $ adjustReadOnly . addHooks <$> mrmt + generate t r u c gc >>= maybe + (return Nothing) + (Just <$$> adjustExportable . adjustReadOnly . addHooks) {- Updates a local git Remote, re-reading its git config. -} updateRemote :: Remote -> Annex (Maybe Remote) diff --git a/Remote/P2P.hs b/Remote/P2P.hs index f51b73b33e..be0d4589f2 100644 --- a/Remote/P2P.hs +++ b/Remote/P2P.hs @@ -34,14 +34,15 @@ import Control.Concurrent import Control.Concurrent.STM remote :: RemoteType -remote = RemoteType { - typename = "p2p", +remote = RemoteType + { typename = "p2p" -- Remote.Git takes care of enumerating P2P remotes, -- and will call chainGen on them. - enumerate = const (return []), - generate = \_ _ _ _ -> return Nothing, - setup = error "P2P remotes are set up using git-annex p2p" -} + , enumerate = const (return []) + , generate = \_ _ _ _ -> return Nothing + , setup = error "P2P remotes are set up using git-annex p2p" + , exportSupported = exportUnsupported + } chainGen :: P2PAddress -> Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) chainGen addr r u c gc = do diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 33485c78b8..79aebad6b8 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -44,12 +44,13 @@ import Utility.SshHost import qualified Data.Map as M remote :: RemoteType -remote = RemoteType { - typename = "rsync", - enumerate = const (findSpecialRemotes "rsyncurl"), - generate = gen, - setup = rsyncSetup -} +remote = RemoteType + { typename = "rsync" + , enumerate = const (findSpecialRemotes "rsyncurl") + , generate = gen + , setup = rsyncSetup + , exportSupported = exportUnsupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do diff --git a/Remote/S3.hs b/Remote/S3.hs index ffa6a11bbd..4b56cce296 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -54,12 +54,13 @@ import Utility.Url (checkBoth, managerSettings, closeManager) type BucketName = String remote :: RemoteType -remote = RemoteType { - typename = "S3", - enumerate = const (findSpecialRemotes "s3"), - generate = gen, - setup = s3Setup -} +remote = RemoteType + { typename = "S3" + , enumerate = const (findSpecialRemotes "s3") + , generate = gen + , setup = s3Setup + , exportSupported = exportUnsupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index b197edca2a..d3d52d7de6 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -52,12 +52,13 @@ type IntroducerFurl = String type Capability = String remote :: RemoteType -remote = RemoteType { - typename = "tahoe", - enumerate = const (findSpecialRemotes "tahoe"), - generate = gen, - setup = tahoeSetup -} +remote = RemoteType + { typename = "tahoe" + , enumerate = const (findSpecialRemotes "tahoe") + , generate = gen + , setup = tahoeSetup + , exportSupported = exportUnsupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do diff --git a/Remote/Web.hs b/Remote/Web.hs index 45e8d1c229..f3580ca996 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -23,12 +23,13 @@ import Annex.Quvi import qualified Utility.Quvi as Quvi remote :: RemoteType -remote = RemoteType { - typename = "web", - enumerate = list, - generate = gen, - setup = error "not supported" -} +remote = RemoteType + { typename = "web" + , enumerate = list + , generate = gen + , setup = error "not supported" + , exportSupported = exportUnsupported + } -- There is only one web remote, and it always exists. -- (If the web should cease to exist, remove this module and redistribute diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 4c9552a6f9..4cc3c92e03 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -41,12 +41,13 @@ import Network.HTTP.Client (HttpExceptionContent(..), responseStatus) #endif remote :: RemoteType -remote = RemoteType { - typename = "webdav", - enumerate = const (findSpecialRemotes "webdav"), - generate = gen, - setup = webdavSetup -} +remote = RemoteType + { typename = "webdav" + , enumerate = const (findSpecialRemotes "webdav") + , generate = gen + , setup = webdavSetup + , exportSupported = exportUnsupported + } gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc expensiveRemoteCost diff --git a/Types/Remote.hs b/Types/Remote.hs index 6f0a312f4f..e2f36a55b6 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -19,6 +19,7 @@ module Types.Remote , Verification(..) , unVerified , ExportLocation(..) + , isExportSupported , ExportActions(..) ) where @@ -36,7 +37,7 @@ import Types.UrlContents import Types.NumCopies import Config.Cost import Utility.Metered -import Git.Types +import Git.Types (RemoteName) import Utility.SafeCommand import Utility.Url @@ -47,17 +48,19 @@ type RemoteConfig = M.Map RemoteConfigKey String data SetupStage = Init | Enable RemoteConfig {- There are different types of remotes. -} -data RemoteTypeA a = RemoteType { +data RemoteTypeA a = RemoteType -- human visible type name - typename :: String, + { typename :: String -- enumerates remotes of this type -- The Bool is True if automatic initialization of remotes is desired - enumerate :: Bool -> a [Git.Repo], - -- generates a remote of this type - generate :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> a (Maybe (RemoteA a)), + , enumerate :: Bool -> a [Git.Repo] + -- generates a remote of this type from the current git config + , generate :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> a (Maybe (RemoteA a)) -- initializes or enables a remote - setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID) -} + , setup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> a (RemoteConfig, UUID) + -- check if a remote of this type is able to support export + , exportSupported :: RemoteConfig -> RemoteGitConfig -> a Bool + } instance Eq (RemoteTypeA a) where x == y = typename x == typename y @@ -161,12 +164,14 @@ unVerified a = do newtype ExportLocation = ExportLocation FilePath deriving (Show, Eq) -data ExportActions a = ExportActions - { exportSupported :: a Bool +isExportSupported :: RemoteA a -> a Bool +isExportSupported r = exportSupported (remotetype r) (config r) (gitconfig r) + +data ExportActions a = ExportActions -- Exports content to an ExportLocation. -- The exported file should not appear to be present on the remote -- until all of its contents have been transferred. - , storeExport :: FilePath -> Key -> ExportLocation -> MeterUpdate -> a Bool + { storeExport :: FilePath -> Key -> ExportLocation -> MeterUpdate -> a Bool -- Retrieves exported content to a file. -- (The MeterUpdate does not need to be used if it writes -- sequentially to the file.) diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index abfa520059..8f5c3f8f1c 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -17,10 +17,6 @@ there need to be a new interface in supported remotes? Work is in progress. Todo list: -* initremote: Don't allow "exporttree=yes" to be set when the special remote - does not support exports. That would be confusing since the user would - set up a special remote for exports, but `git annex export` to it would - later fail.. * `git annex get --from export` works in the repo that exported to it, but in another repo, the export db won't be populated, so it won't work. Maybe just show a useful error message in this case? From a48b52c056328eeb164f1c6f6afd029256730b62 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Sep 2017 14:32:47 -0400 Subject: [PATCH 30/31] avoid renaming to temp files before deleting Only rename when actually ncessary. The diff gets buffered in memory. Probably git has to buffer a diff in memory when generating it as well, so this memory usage should not be a problem, even when the diff is very large. I hope. This commit was supported by the NSF-funded DataLad project. --- Command/Export.hs | 90 ++++++++++++++++++++++++++++++-------------- doc/todo/export.mdwn | 3 -- 2 files changed, 61 insertions(+), 32 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index 2cf453ea14..09878dabfc 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -5,6 +5,8 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE TupleSections #-} + module Command.Export where import Command @@ -26,6 +28,7 @@ import Messages.Progress import Utility.Tmp import qualified Data.ByteString.Lazy as L +import qualified Data.Map as M cmd :: Command cmd = command "export" SectionCommon @@ -49,7 +52,7 @@ optParser _ = ExportOptions -- An export includes both annexed files and files stored in git. -- For the latter, a SHA1 key is synthesized. data ExportKey = AnnexKey Key | GitKey Key - deriving (Show) + deriving (Show, Eq, Ord) asKey :: ExportKey -> Key asKey (AnnexKey k) = k @@ -103,17 +106,22 @@ seek o = do case map exportedTreeish old of [] -> return () [oldtreesha] -> do - -- Rename all old files to temp. - mapdiff - (\diff -> startMoveToTempName r db (Git.DiffTree.file diff) (Git.DiffTree.srcsha diff)) - oldtreesha new + diffmap <- mkDiffMap oldtreesha new + let seekdiffmap a = seekActions $ pure $ map a (M.toList diffmap) + -- Rename old files to temp, or delete. + seekdiffmap $ \(ek, (moldf, mnewf)) -> + case (moldf, mnewf) of + (Just oldf, Just _newf) -> + startMoveToTempName r db oldf ek + (Just oldf, Nothing) -> + startUnexport' r db oldf ek + _ -> stop -- Rename from temp to new files. - mapdiff (\diff -> startMoveFromTempName r db (Git.DiffTree.dstsha diff) (Git.DiffTree.file diff)) - oldtreesha new - -- Remove all remaining temps. - mapdiff - (startUnexportTempName r db . Git.DiffTree.srcsha) - oldtreesha new + seekdiffmap $ \(ek, (moldf, mnewf)) -> + case (moldf, mnewf) of + (Just _oldf, Just newf) -> + startMoveFromTempName r db ek newf + _ -> stop ts -> do warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.." forM_ ts $ \oldtreesha -> do @@ -126,7 +134,7 @@ seek o = do , Git.DiffTree.dstsha d ] -- Don't rename to temp, because the - -- content is unknown; unexport instead. + -- content is unknown; delete instead. mapdiff (\diff -> startUnexport r db (Git.DiffTree.file diff) (unexportboth diff)) oldtreesha new @@ -152,6 +160,28 @@ seek o = do seekActions $ pure $ map a diff void $ liftIO cleanup +-- Map of old and new filenames for each changed ExportKey in a diff. +type DiffMap = M.Map ExportKey (Maybe TopFilePath, Maybe TopFilePath) + +mkDiffMap :: Git.Ref -> Git.Ref -> Annex DiffMap +mkDiffMap old new = do + (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive old new + diffmap <- M.fromListWith combinedm . concat <$> forM diff mkdm + void $ liftIO cleanup + return diffmap + where + combinedm (srca, dsta) (srcb, dstb) = (srca <|> srcb, dsta <|> dstb) + mkdm i = do + srcek <- getk (Git.DiffTree.srcsha i) + dstek <- getk (Git.DiffTree.dstsha i) + return $ catMaybes + [ (, (Just (Git.DiffTree.file i), Nothing)) <$> srcek + , (, (Nothing, Just (Git.DiffTree.file i))) <$> dstek + ] + getk sha + | sha == nullSha = return Nothing + | otherwise = Just <$> exportKey sha + startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart startExport r db ti = do ek <- exportKey (Git.LsTree.sha ti) @@ -204,6 +234,14 @@ startUnexport r db f shas = do loc = ExportLocation $ toInternalGitPath f' f' = getTopFilePath f +startUnexport' :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart +startUnexport' r db f ek = do + showStart "unexport" f' + next $ performUnexport r db [ek] loc + where + loc = ExportLocation $ toInternalGitPath f' + f' = getTopFilePath f + performUnexport :: Remote -> ExportHandle -> [ExportKey] -> ExportLocation -> CommandPerform performUnexport r db eks loc = do ifM (allM (\ek -> removeExport (exportActions r) (asKey ek) loc) eks) @@ -236,27 +274,21 @@ startUnexportTempName r db sha showStart "unexport" f next $ performUnexport r db [ek] loc -startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> Git.Sha -> CommandStart -startMoveToTempName r db f sha - | sha == nullSha = stop - | otherwise = do - ek <- exportKey sha - let tmploc@(ExportLocation tmpf) = exportTempName ek - showStart "rename" (f' ++ " -> " ++ tmpf) - next $ performRename r db ek loc tmploc +startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart +startMoveToTempName r db f ek = do + let tmploc@(ExportLocation tmpf) = exportTempName ek + showStart "rename" (f' ++ " -> " ++ tmpf) + next $ performRename r db ek loc tmploc where loc = ExportLocation $ toInternalGitPath f' f' = getTopFilePath f -startMoveFromTempName :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart -startMoveFromTempName r db sha f - | sha == nullSha = stop - | otherwise = do - ek <- exportKey sha - let tmploc@(ExportLocation tmpf) = exportTempName ek - stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do - showStart "rename" (tmpf ++ " -> " ++ f') - next $ performRename r db ek tmploc loc +startMoveFromTempName :: Remote -> ExportHandle -> ExportKey -> TopFilePath -> CommandStart +startMoveFromTempName r db ek f = do + let tmploc@(ExportLocation tmpf) = exportTempName ek + stopUnless (liftIO $ elem tmploc <$> getExportLocation db (asKey ek)) $ do + showStart "rename" (tmpf ++ " -> " ++ f') + next $ performRename r db ek tmploc loc where loc = ExportLocation $ toInternalGitPath f' f' = getTopFilePath f diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 8f5c3f8f1c..c4e57bd1c1 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -24,8 +24,5 @@ Work is in progress. Todo list: export from another repository also doesn't work right, because the export database is not populated. So, seems that the export database needs to get populated based on the export log in these cases. -* Currently all modified/deleted files are renamed to temp files, - even when they won't be used. Avoid doing this unless the - temp file will be renamed to the new filename. * Support export to aditional special remotes (S3 etc) * Support export to external special remotes. From cd5f405623d31ae64fa0258e5f7c9ec9618fad84 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Sep 2017 15:37:49 -0400 Subject: [PATCH 31/31] interrupted export recovery bugfixes When an export was interrupted, the sqlite database won't have been committed necessarily. Also, the interrupted export might have been run in an entirely different repository. There's not a significant speed benefit in checking getExportLocation in this case anyway, so avoid it. Also, remove the old filename from the export database. Recovery from interrupted exports is now tested working. This commit was supported by the NSF-funded DataLad project. --- Command/Export.hs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index 09878dabfc..d2ba53dd23 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -96,7 +96,9 @@ seek o = do -- temp files. Diff from the incomplete tree to the new tree, -- and delete any temp files that the new tree can't use. forM_ (concatMap incompleteExportedTreeish old) $ \incomplete -> - mapdiff (startUnexportTempName r db . Git.DiffTree.srcsha) incomplete new + mapdiff (\diff -> startRecoverIncomplete r db (Git.DiffTree.srcsha diff) (Git.DiffTree.file diff)) + incomplete + new -- Diff the old and new trees, and delete or rename to new name all -- changed files in the export. After this, every file that remains @@ -264,15 +266,18 @@ cleanupUnexport r db eks loc = do logChange (asKey ek) (uuid r) InfoMissing return True -startUnexportTempName :: Remote -> ExportHandle -> Git.Sha -> CommandStart -startUnexportTempName r db sha +startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart +startRecoverIncomplete r db sha oldf | sha == nullSha = stop | otherwise = do ek <- exportKey sha let loc@(ExportLocation f) = exportTempName ek - stopUnless (liftIO $ elem loc <$> getExportLocation db (asKey ek)) $ do - showStart "unexport" f - next $ performUnexport r db [ek] loc + showStart "unexport" f + liftIO $ removeExportLocation db (asKey ek) oldloc + next $ performUnexport r db [ek] loc + where + oldloc = ExportLocation $ toInternalGitPath oldf' + oldf' = getTopFilePath oldf startMoveToTempName :: Remote -> ExportHandle -> TopFilePath -> ExportKey -> CommandStart startMoveToTempName r db f ek = do