track exported files in a sqlite database

Went with a separate db per export remote, rather than a single export
database. Mostly because there will probably not be a lot of separate
export remotes, and it might be convenient to be able to delete a given
remote's export database.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-09-04 13:52:22 -04:00
parent 28e2cad849
commit 7eb9889bfd
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 107 additions and 6 deletions

View file

@ -36,6 +36,7 @@ module Annex.Locations (
gitAnnexFsckDbDir,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
gitAnnexExportDbDir,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
@ -290,6 +291,15 @@ gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
gitAnnexFsckResultsLog u r = gitAnnexDir r </> "fsckresults" </> fromUUID u
{- .git/annex/export/uuid/ is used to store information about
- exports to special remotes. -}
gitAnnexExportDir :: UUID -> Git.Repo -> FilePath
gitAnnexExportDir u r = gitAnnexDir r </> "export" </> fromUUID u
{- Directory containing database used to record export info. -}
gitAnnexExportDbDir :: UUID -> Git.Repo -> FilePath
gitAnnexExportDbDir u r = gitAnnexExportDir u r </> "db"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> FilePath

View file

@ -21,6 +21,7 @@ import Annex.Content
import Annex.CatFile
import Logs.Location
import Logs.Export
import Database.Export
import Messages.Progress
import Utility.Tmp
@ -81,6 +82,8 @@ seek o = do
when (length old > 1) $
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
db <- openDb (uuid r)
-- First, diff the old and new trees and delete all changed
-- files in the export. Every file that remains in the export will
-- have the content from the new treeish.
@ -89,7 +92,7 @@ seek o = do
forM_ old $ \oldtreesha -> do
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreesha new
seekActions $ pure $ map (startUnexport r) diff
seekActions $ pure $ map (startUnexport r db) diff
void $ liftIO cleanup
-- Waiting until now to record the export guarantees that,
@ -102,12 +105,13 @@ seek o = do
-- Export everything that is not yet exported.
(l, cleanup') <- inRepo $ Git.LsTree.lsTree new
seekActions $ pure $ map (startExport r) l
seekActions $ pure $ map (startExport r db) l
void $ liftIO cleanup'
startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart
startExport r ti = do
startExport :: Remote -> ExportHandle -> Git.LsTree.TreeItem -> CommandStart
startExport r db ti = do
ek <- exportKey (Git.LsTree.sha ti)
liftIO $ addExportLocation db (asKey ek) loc
stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do
showStart "export" f
next $ performExport r ek (Git.LsTree.sha ti) loc
@ -144,11 +148,12 @@ cleanupExport r ek = do
logChange (asKey ek) (uuid r) InfoPresent
return True
startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
startUnexport r diff
startUnexport :: Remote -> ExportHandle -> Git.DiffTree.DiffTreeItem -> CommandStart
startUnexport r db diff
| Git.DiffTree.srcsha diff /= nullSha = do
showStart "unexport" f
oldk <- exportKey (Git.DiffTree.srcsha diff)
liftIO $ removeExportLocation db (asKey oldk) loc
next $ performUnexport r oldk loc
| otherwise = stop
where

85
Database/Export.hs Normal file
View file

@ -0,0 +1,85 @@
{- Sqlite database used for exports to special remotes.
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
module Database.Export (
ExportHandle,
openDb,
closeDb,
addExportLocation,
removeExportLocation,
getExportLocation,
ExportedId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
import Annex.Common hiding (delete)
import Types.Remote (ExportLocation(..))
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
data ExportHandle = ExportHandle H.DbQueue
share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase|
Exported
key IKey
file SFilePath
KeyFileIndex key file
UniqueKey key
|]
{- Opens the database, creating it if it doesn't exist yet. -}
openDb :: UUID -> Annex ExportHandle
openDb u = do
dbdir <- fromRepo (gitAnnexExportDbDir u)
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do
initDb db $ void $
runMigrationSilent migrateExport
h <- liftIO $ H.openDbQueue db "exported"
return $ ExportHandle h
closeDb :: ExportHandle -> Annex ()
closeDb (ExportHandle h) = liftIO $ H.closeDbQueue h
queueDb :: ExportHandle -> SqlPersistM () -> IO ()
queueDb (ExportHandle h) = H.queueDb h checkcommit
where
-- commit queue after 1000 changes
checkcommit sz _lastcommittime
| sz > 1000 = return True
| otherwise = return False
addExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
addExportLocation h k (ExportLocation f) = queueDb h $
void $ insertUnique $ Exported (toIKey k) (toSFilePath f)
removeExportLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
removeExportLocation h k (ExportLocation f) = queueDb h $
delete $ from $ \r -> do
where_ (r ^. ExportedKey ==. val ik &&. r ^. ExportedFile ==. val ef)
where
ik = toIKey k
ef = toSFilePath f
{- Doesn't know about recently queued changes. -}
getExportLocation :: ExportHandle -> Key -> IO [ExportLocation]
getExportLocation (ExportHandle h) k = H.queryDbQueue h $ do
l <- select $ from $ \r -> do
where_ (r ^. ExportedKey ==. val ik)
return (r ^. ExportedFile)
return $ map (ExportLocation . fromSFilePath . unValue) l
where
ik = toIKey k

View file

@ -787,6 +787,7 @@ Executable git-annex
Config.GitConfig
Creds
Crypto
Database.Export
Database.Fsck
Database.Handle
Database.Init