add Database.ContentIdentifier

Does not yet have a way to update with new information from the
git-annex branch, which will be needed when multiple repos are importing
from the same remote.
This commit is contained in:
Joey Hess 2019-02-20 16:59:10 -04:00
parent ccc0684d21
commit a818bc5e73
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 132 additions and 3 deletions

View file

@ -48,6 +48,8 @@ module Annex.Locations (
gitAnnexSmudgeLock,
gitAnnexExportDbDir,
gitAnnexExportLock,
gitAnnexContentIdentifierDbDir,
gitAnnexContentIdentifierLock,
gitAnnexScheduleState,
gitAnnexTransferDir,
gitAnnexCredsDir,
@ -348,6 +350,14 @@ gitAnnexExportDbDir u r = gitAnnexExportDir u r </> "db"
gitAnnexExportLock :: UUID -> Git.Repo -> FilePath
gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck"
{- Directory containing database used to record remote content ids. -}
gitAnnexContentIdentifierDbDir :: Git.Repo -> FilePath
gitAnnexContentIdentifierDbDir r = gitAnnexDir r </> "cid"
{- Lock file for writing to the content id database. -}
gitAnnexContentIdentifierLock :: Git.Repo -> FilePath
gitAnnexContentIdentifierLock r = gitAnnexContentIdentifierDbDir r ++ ".lck"
{- .git/annex/schedulestate is used to store information about when
- scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> FilePath

View file

@ -10,7 +10,7 @@ Copyright: © 2012-2017 Joey Hess <id@joeyh.name>
© 2014 Sören Brunk
License: AGPL-3+
Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Database/ContentIdentifier.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs
Copyright: © 2011-2019 Joey Hess <id@joeyh.name>
License: AGPL-3+

View file

@ -0,0 +1,89 @@
{- Sqlite database of ContentIdentifiers imported from special remotes.
-
- This contains a mapping from ContentIdentifier to Key.
- The reverse mapping from Key to ContentIdentifier is stored in the
- git-annex branch, see Logs.ContentIdentifier.
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
module Database.ContentIdentifier (
ContentIdentifierHandle,
openDb,
closeDb,
flushDbQueue,
recordContentIdentifier,
getContentIdentifierKeys,
ContentIdentifiersId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
import Annex.Common hiding (delete)
import Types.Remote (ContentIdentifier(..))
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
data ContentIdentifierHandle = ContentIdentifierHandle H.DbQueue
share [mkPersist sqlSettings, mkMigrate "migrateContentIdentifier"] [persistLowerCase|
ContentIdentifiers
remote UUID
cid ContentIdentifier
key SKey
ContentIdentifiersIndex remote cid
UniqueRemoteCidKey remote cid key
|]
{- Opens the database, creating it if it doesn't exist yet.
-
- Only a single process should write to the database at a time, so guard
- any writes with the gitAnnexContentIdentifierLock.
-}
openDb :: Annex ContentIdentifierHandle
openDb = do
dbdir <- fromRepo gitAnnexContentIdentifierDbDir
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do
initDb db $ void $
runMigrationSilent migrateContentIdentifier
h <- liftIO $ H.openDbQueue H.SingleWriter db "contentidentifiers"
return $ ContentIdentifierHandle h
closeDb :: ContentIdentifierHandle -> Annex ()
closeDb (ContentIdentifierHandle h) = liftIO $ H.closeDbQueue h
queueDb :: ContentIdentifierHandle -> SqlPersistM () -> IO ()
queueDb (ContentIdentifierHandle h) = H.queueDb h checkcommit
where
-- commit queue after 1000 changes
checkcommit sz _lastcommittime
| sz > 1000 = return True
| otherwise = return False
flushDbQueue :: ContentIdentifierHandle -> IO ()
flushDbQueue (ContentIdentifierHandle h) = H.flushDbQueue h
-- Be sure to also update the git-annex branch when using this.
recordContentIdentifier :: ContentIdentifierHandle -> UUID -> ContentIdentifier -> Key -> IO ()
recordContentIdentifier h u cid k = queueDb h $ do
void $ insertUnique $ ContentIdentifiers u cid (toSKey k)
getContentIdentifierKeys :: ContentIdentifierHandle -> UUID -> ContentIdentifier -> IO [Key]
getContentIdentifierKeys (ContentIdentifierHandle h) u cid =
H.queryDbQueue h $ do
l <- selectList
[ ContentIdentifiersCid ==. cid
, ContentIdentifiersRemote ==. u
] []
return $ map (fromSKey . contentIdentifiersKey . entityVal) l

View file

@ -1,22 +1,29 @@
{- types for SQL databases
-
- Copyright 2015-2017 Joey Hess <id@joeyh.name>
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Database.Types where
import Database.Persist.TH
import Database.Persist.Class hiding (Key)
import Database.Persist.Sql hiding (Key)
import Data.Maybe
import Data.Char
import qualified Data.ByteString as S
import qualified Data.Text as T
import Utility.PartialPrelude
import Key
import Utility.InodeCache
import Git.Types (Ref(..))
import Types.UUID
import Types.Remote (ContentIdentifier(..))
-- A serialized Key
newtype SKey = SKey String
@ -112,3 +119,26 @@ toSRef = SRef
fromSRef :: SRef -> Ref
fromSRef (SRef r) = r
instance PersistField UUID where
toPersistValue u = toPersistValue b
where
b :: S.ByteString
b = fromUUID u
fromPersistValue v = toUUID <$> go
where
go :: Either T.Text S.ByteString
go = fromPersistValue v
instance PersistFieldSql UUID where
sqlType _ = SqlBlob
instance PersistField ContentIdentifier where
toPersistValue (ContentIdentifier b) = toPersistValue b
fromPersistValue v = ContentIdentifier <$> go
where
go :: Either T.Text S.ByteString
go = fromPersistValue v
instance PersistFieldSql ContentIdentifier where
sqlType _ = SqlBlob

View file

@ -15,7 +15,6 @@ module Logs.ContentIdentifier.Pure
import Annex.Common
import Logs.MapLog
import Data.Int
import Types.Remote (ContentIdentifier(..))
import Utility.Base64

View file

@ -808,6 +808,7 @@ Executable git-annex
Config.Smudge
Creds
Crypto
Database.ContentIdentifier
Database.Export
Database.Fsck
Database.Handle