diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 947cceef92..f86dfc6f46 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -303,7 +303,7 @@ gitAnnexExportDbDir u r = gitAnnexExportDir u r "db" {- Lock file for export state for a special remote. -} gitAnnexExportLock :: UUID -> Git.Repo -> FilePath -gitAnnexExportLock u r = gitAnnexExportDir u r ++ ".lck" +gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck" {- .git/annex/schedulestate is used to store information about when - scheduled jobs were last run. -} diff --git a/Command/Export.hs b/Command/Export.hs index 811e2351a0..02c64eadfa 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -27,7 +27,6 @@ import Annex.LockFile import Logs.Location import Logs.Export import Database.Export -import Remote.Helper.Export import Messages.Progress import Utility.Tmp @@ -129,7 +128,7 @@ seek' o r = do (\diff -> startUnexport r ea db (Git.DiffTree.file diff) (unexportboth diff)) oldtreesha new updateExportTree db emptyTree new - liftIO $ recordDataSource db new + liftIO $ recordExportTreeCurrent db new -- Waiting until now to record the export guarantees that, -- if this export is interrupted, there are no files left over @@ -312,3 +311,28 @@ cleanupRename ea db ek src dest = do if exportDirectories src /= exportDirectories dest then removeEmptyDirectories ea db src [asKey ek] else return True + +-- | Remove empty directories from the export. Call after removing an +-- exported file, and after calling removeExportLocation and flushing the +-- database. +removeEmptyDirectories :: ExportActions Annex -> ExportHandle -> ExportLocation -> [Key] -> Annex Bool +removeEmptyDirectories ea db loc ks + | null (exportDirectories loc) = return True + | otherwise = case removeExportDirectory ea of + Nothing -> return True + Just removeexportdirectory -> do + ok <- allM (go removeexportdirectory) + (reverse (exportDirectories loc)) + unless ok $ liftIO $ do + -- Add location back to export database, + -- so this is tried again next time. + forM_ ks $ \k -> + addExportedLocation db k loc + flushDbQueue db + return ok + where + go removeexportdirectory d = + ifM (liftIO $ isExportDirectoryEmpty db d) + ( removeexportdirectory d + , return True + ) diff --git a/Database/Export.hs b/Database/Export.hs index ad106f84e6..322ab48fdc 100644 --- a/Database/Export.hs +++ b/Database/Export.hs @@ -15,21 +15,21 @@ module Database.Export ( openDb, closeDb, flushDbQueue, - recordDataSource, - getDataSource, addExportedLocation, removeExportedLocation, getExportedLocation, isExportDirectoryEmpty, + getExportTreeCurrent, + recordExportTreeCurrent, getExportTree, addExportTree, removeExportTree, updateExportTree, updateExportTree', ExportedId, - ExportTreeId, ExportedDirectoryId, - DataSourceId, + ExportTreeId, + ExportTreeCurrentId, ) where import Database.Types @@ -50,29 +50,33 @@ import Database.Esqueleto hiding (Key) newtype ExportHandle = ExportHandle H.DbQueue share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase| --- Files that have been exported to the remote. +-- Files that have been exported to the remote and are present on it. Exported key IKey file SFilePath ExportedIndex key file --- The tree that has been exported to the remote. --- Not all of these files are necessarily present on the remote yet. -ExportTree - key IKey - file SFilePath - ExportTreeIndex key file -- Directories that exist on the remote, and the files that are in them. ExportedDirectory subdir SFilePath file SFilePath ExportedDirectoryIndex subdir file --- Record of what tree the current database content comes from. -DataSource +-- The content of the tree that has been exported to the remote. +-- Not all of these files are necessarily present on the remote yet. +ExportTree + key IKey + file SFilePath + ExportTreeIndex key file +-- The tree stored in ExportTree +ExportTreeCurrent tree SRef UniqueTree tree |] -{- Opens the database, creating it if it doesn't exist yet. -} +{- Opens the database, creating it if it doesn't exist yet. + - + - Only a single process should write to the export at a time, so guard + - any writes with the gitAnnexExportLock. + -} openDb :: UUID -> Annex ExportHandle openDb u = do dbdir <- fromRepo (gitAnnexExportDbDir u) @@ -97,19 +101,19 @@ queueDb (ExportHandle h) = H.queueDb h checkcommit flushDbQueue :: ExportHandle -> IO () flushDbQueue (ExportHandle h) = H.flushDbQueue h -recordDataSource :: ExportHandle -> Sha -> IO () -recordDataSource h s = queueDb h $ do +recordExportTreeCurrent :: ExportHandle -> Sha -> IO () +recordExportTreeCurrent h s = queueDb h $ do delete $ from $ \r -> do - where_ (r ^. DataSourceTree ==. r ^. DataSourceTree) - void $ insertUnique $ DataSource (toSRef s) + where_ (r ^. ExportTreeCurrentTree ==. r ^. ExportTreeCurrentTree) + void $ insertUnique $ ExportTreeCurrent $ toSRef s -getDataSource :: ExportHandle -> IO (Maybe Sha) -getDataSource (ExportHandle h) = H.queryDbQueue h $ do +getExportTreeCurrent :: ExportHandle -> IO (Maybe Sha) +getExportTreeCurrent (ExportHandle h) = H.queryDbQueue h $ do l <- select $ from $ \r -> do - where_ (r ^. DataSourceTree ==. r ^. DataSourceTree) - return (r ^. DataSourceTree) + where_ (r ^. ExportTreeCurrentTree ==. r ^. ExportTreeCurrentTree) + return (r ^. ExportTreeCurrentTree) case l of - (s:[]) -> return (Just (fromSRef (unValue s))) + (s:[]) -> return $ Just $ fromSRef $ unValue s _ -> return Nothing addExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO () @@ -167,7 +171,7 @@ getExportTree (ExportHandle h) k = H.queryDbQueue h $ do addExportTree :: ExportHandle -> Key -> ExportLocation -> IO () addExportTree h k loc = queueDb h $ - void $ insertUnique $ Exported ik ef + void $ insertUnique $ ExportTree ik ef where ik = toIKey k ef = toSFilePath (fromExportLocation loc) diff --git a/Remote/Helper/Export.hs b/Remote/Helper/Export.hs index 9b31baca3b..d62c5a7e81 100644 --- a/Remote/Helper/Export.hs +++ b/Remote/Helper/Export.hs @@ -12,13 +12,16 @@ module Remote.Helper.Export where import Annex.Common import Types.Remote import Types.Backend -import Types.Export import Types.Key import Backend import Remote.Helper.Encryptable (isEncrypted) import Database.Export +import Logs.Export +import Annex.LockFile +import Git.Sha import qualified Data.Map as M +import Control.Concurrent.STM -- | Use for remotes that do not support exports. class HasExportUnsupported a where @@ -89,6 +92,33 @@ adjustExportable r = case M.lookup "exporttree" (config r) of } isexport = do db <- openDb (uuid r) + + updateflag <- liftIO newEmptyTMVarIO + let updateonce = liftIO $ atomically $ + ifM (isEmptyTMVar updateflag) + ( do + putTMVar updateflag () + return True + , return False + ) + + -- Get export locations for a key. Checks once + -- if the export log is different than the database and + -- updates the database, to notice when an export has been + -- updated from another repository. + let getexportlocs = \k -> do + whenM updateonce $ withExclusiveLock (gitAnnexExportLock (uuid r)) $ do + old <- liftIO $ fromMaybe emptyTree + <$> getExportTreeCurrent db + l <- getExport (uuid r) + case map exportedTreeish l of + (new:[]) | new /= old -> do + updateExportTree db old new + liftIO $ recordExportTreeCurrent db new + liftIO $ flushDbQueue db + _ -> return () + liftIO $ getExportTree db k + return $ r -- Storing a key on an export could be implemented, -- but it would perform unncessary work @@ -104,7 +134,7 @@ adjustExportable r = case M.lookup "exporttree" (config r) of , retrieveKeyFile = \k _af dest p -> unVerified $ if maybe False (isJust . verifyKeyContent) (maybeLookupBackendVariety (keyVariety k)) then do - locs <- liftIO $ getExportTree db k + locs <- getexportlocs k case locs of [] -> do warning "unknown export location" @@ -135,34 +165,9 @@ adjustExportable r = case M.lookup "exporttree" (config r) of , checkPresent = \k -> do ea <- exportActions r anyM (checkPresentExport ea k) - =<< liftIO (getExportTree db k) + =<< getexportlocs k , mkUnavailable = return Nothing , getInfo = do is <- getInfo r return (is++[("export", "yes")]) } - --- | Remove empty directories from the export. Call after removing an --- exported file, and after calling removeExportLocation and flushing the --- database. -removeEmptyDirectories :: ExportActions Annex -> ExportHandle -> ExportLocation -> [Key] -> Annex Bool -removeEmptyDirectories ea db loc ks - | null (exportDirectories loc) = return True - | otherwise = case removeExportDirectory ea of - Nothing -> return True - Just removeexportdirectory -> do - ok <- allM (go removeexportdirectory) - (reverse (exportDirectories loc)) - unless ok $ liftIO $ do - -- Add location back to export database, - -- so this is tried again next time. - forM_ ks $ \k -> - addExportedLocation db k loc - flushDbQueue db - return ok - where - go removeexportdirectory d = - ifM (liftIO $ isExportDirectoryEmpty db d) - ( removeexportdirectory d - , return True - ) diff --git a/doc/todo/export.mdwn b/doc/todo/export.mdwn index 3ddca0cf8d..6c6789a29b 100644 --- a/doc/todo/export.mdwn +++ b/doc/todo/export.mdwn @@ -17,38 +17,8 @@ there need to be a new interface in supported remotes? Work is in progress. Todo list: -* `git annex get --from export` works in the repo that exported to it, - but in another repo, the export db won't be populated, so it won't work. - Maybe just show a useful error message in this case? - - However, exporting from one repository and then trying to update the - export from another repository also doesn't work right, because the - export database is not populated. So, seems that the export database needs - to get populated based on the export log in these cases. - - This needs the db to contain a record of the data source, - the tree that most recently populated it. - - When the export log contains a different tree than the data source, - the export was updated in another repository, and so the - export db needs to be updated. - - Updating the export db could diff the data source with the - logged treeish. Add/delete exported files from the database to get - it to the same state as the remote database. - - When an export is incomplete, the database is in some - state in between the data source tree and the incompletely - exported tree. Diffing won't resolve this. - - When to record the data source? If it's done at the same time the export - is recorded (as no longer incomplete) in the export log, all the files - have not yet been uploaded to the export, and the the database is not - fully updated to match the data source. - - Seems that we need a separate table, to be able to look up filenames - from the export tree by key. That table can be fully populated, - before the Exported table is. +* bug: export db update does not reash disk after Remote.Helper.Export calls + updateExportTree. * tracking exports