track exported files in a sqlite database

Went with a separate db per export remote, rather than a single export
database. Mostly because there will probably not be a lot of separate
export remotes, and it might be convenient to be able to delete a given
remote's export database.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-09-04 13:52:22 -04:00
parent 28e2cad849
commit 7eb9889bfd
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 107 additions and 6 deletions

View file

@ -36,6 +36,7 @@ module Annex.Locations (
gitAnnexFsckDbDir, gitAnnexFsckDbDir,
gitAnnexFsckDbLock, gitAnnexFsckDbLock,
gitAnnexFsckResultsLog, gitAnnexFsckResultsLog,
gitAnnexExportDbDir,
gitAnnexScheduleState, gitAnnexScheduleState,
gitAnnexTransferDir, gitAnnexTransferDir,
gitAnnexCredsDir, gitAnnexCredsDir,
@ -290,6 +291,15 @@ gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
gitAnnexFsckResultsLog u r = gitAnnexDir r </> "fsckresults" </> fromUUID u gitAnnexFsckResultsLog u r = gitAnnexDir r </> "fsckresults" </> fromUUID u
{- .git/annex/export/uuid/ is used to store information about
- exports to special remotes. -}
gitAnnexExportDir :: UUID -> Git.Repo -> FilePath
gitAnnexExportDir u r = gitAnnexDir r </> "export" </> fromUUID u
{- Directory containing database used to record export info. -}
gitAnnexExportDbDir :: UUID -> Git.Repo -> FilePath
gitAnnexExportDbDir u r = gitAnnexExportDir u r </> "db"
{- .git/annex/schedulestate is used to store information about when {- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -} - scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> FilePath gitAnnexScheduleState :: Git.Repo -> FilePath

View file

@ -21,6 +21,7 @@ import Annex.Content
import Annex.CatFile import Annex.CatFile
import Logs.Location import Logs.Location
import Logs.Export import Logs.Export
import Database.Export
import Messages.Progress import Messages.Progress
import Utility.Tmp import Utility.Tmp
@ -81,6 +82,8 @@ seek o = do
when (length old > 1) $ when (length old > 1) $
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.." warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
db <- openDb (uuid r)
-- First, diff the old and new trees and delete all changed -- First, diff the old and new trees and delete all changed
-- files in the export. Every file that remains in the export will -- files in the export. Every file that remains in the export will
-- have the content from the new treeish. -- have the content from the new treeish.
@ -89,7 +92,7 @@ seek o = do
forM_ old $ \oldtreesha -> do forM_ old $ \oldtreesha -> do
(diff, cleanup) <- inRepo $ (diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreesha new Git.DiffTree.diffTreeRecursive oldtreesha new
seekActions $ pure $ map (startUnexport r) diff seekActions $ pure $ map (startUnexport r db) diff
void $ liftIO cleanup void $ liftIO cleanup
-- Waiting until now to record the export guarantees that, -- Waiting until now to record the export guarantees that,
@ -102,12 +105,13 @@ seek o = do
-- Export everything that is not yet exported. -- Export everything that is not yet exported.
(l, cleanup') <- inRepo $ Git.LsTree.lsTree new (l, cleanup') <- inRepo $ Git.LsTree.lsTree new
seekActions $ pure $ map (startExport r) l seekActions $ pure $ map (startExport r db) l
void $ liftIO cleanup' void $ liftIO cleanup'
startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart
startExport r ti = do startExport r db ti = do
ek <- exportKey (Git.LsTree.sha ti) ek <- exportKey (Git.LsTree.sha ti)
liftIO $ addExportLocation db (asKey ek) loc
stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do
showStart "export" f showStart "export" f
next $ performExport r ek (Git.LsTree.sha ti) loc next $ performExport r ek (Git.LsTree.sha ti) loc
@ -144,11 +148,12 @@ cleanupExport r ek = do
logChange (asKey ek) (uuid r) InfoPresent logChange (asKey ek) (uuid r) InfoPresent
return True return True
startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart startUnexport :: Remote -> ExportHandle -> Git.DiffTree.DiffTreeItem -> CommandStart
startUnexport r diff startUnexport r db diff
| Git.DiffTree.srcsha diff /= nullSha = do | Git.DiffTree.srcsha diff /= nullSha = do
showStart "unexport" f showStart "unexport" f
oldk <- exportKey (Git.DiffTree.srcsha diff) oldk <- exportKey (Git.DiffTree.srcsha diff)
liftIO $ removeExportLocation db (asKey oldk) loc
next $ performUnexport r oldk loc next $ performUnexport r oldk loc
| otherwise = stop | otherwise = stop
where where

85
Database/Export.hs Normal file
View file

@ -0,0 +1,85 @@
{- Sqlite database used for exports to special remotes.
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
module Database.Export (
ExportHandle,
openDb,
closeDb,
addExportLocation,
removeExportLocation,
getExportLocation,
ExportedId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
import Annex.Common hiding (delete)
import Types.Remote (ExportLocation(..))
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
data ExportHandle = ExportHandle H.DbQueue
share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase|
Exported
key IKey
file SFilePath
KeyFileIndex key file
UniqueKey key
|]
{- Opens the database, creating it if it doesn't exist yet. -}
openDb :: UUID -> Annex ExportHandle
openDb u = do
dbdir <- fromRepo (gitAnnexExportDbDir u)
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do
initDb db $ void $
runMigrationSilent migrateExport
h <- liftIO $ H.openDbQueue db "exported"
return $ ExportHandle h
closeDb :: ExportHandle -> Annex ()
closeDb (ExportHandle h) = liftIO $ H.closeDbQueue h
queueDb :: ExportHandle -> SqlPersistM () -> IO ()
queueDb (ExportHandle h) = H.queueDb h checkcommit
where
-- commit queue after 1000 changes
checkcommit sz _lastcommittime
| sz > 1000 = return True
| otherwise = return False
addExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
addExportLocation h k (ExportLocation f) = queueDb h $
void $ insertUnique $ Exported (toIKey k) (toSFilePath f)
removeExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
removeExportLocation h k (ExportLocation f) = queueDb h $
delete $ from $ \r -> do
where_ (r ^. ExportedKey ==. val ik &&. r ^. ExportedFile ==. val ef)
where
ik = toIKey k
ef = toSFilePath f
{- Doesn't know about recently queued changes. -}
getExportLocation :: ExportHandle -> Key -> IO [ExportLocation]
getExportLocation (ExportHandle h) k = H.queryDbQueue h $ do
l <- select $ from $ \r -> do
where_ (r ^. ExportedKey ==. val ik)
return (r ^. ExportedFile)
return $ map (ExportLocation . fromSFilePath . unValue) l
where
ik = toIKey k

View file

@ -787,6 +787,7 @@ Executable git-annex
Config.GitConfig Config.GitConfig
Creds Creds
Crypto Crypto
Database.Export
Database.Fsck Database.Fsck
Database.Handle Database.Handle
Database.Init Database.Init