From a818bc5e73db6b5f9fcc5f938b6898e7fb05d53e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 20 Feb 2019 16:59:10 -0400 Subject: [PATCH] add Database.ContentIdentifier Does not yet have a way to update with new information from the git-annex branch, which will be needed when multiple repos are importing from the same remote. --- Annex/Locations.hs | 10 ++++ COPYRIGHT | 2 +- Database/ContentIdentifier.hs | 89 ++++++++++++++++++++++++++++++++++ Database/Types.hs | 32 +++++++++++- Logs/ContentIdentifier/Pure.hs | 1 - git-annex.cabal | 1 + 6 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 Database/ContentIdentifier.hs diff --git a/Annex/Locations.hs b/Annex/Locations.hs index ad75271869..99d831fe1a 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -48,6 +48,8 @@ module Annex.Locations ( gitAnnexSmudgeLock, gitAnnexExportDbDir, gitAnnexExportLock, + gitAnnexContentIdentifierDbDir, + gitAnnexContentIdentifierLock, gitAnnexScheduleState, gitAnnexTransferDir, gitAnnexCredsDir, @@ -348,6 +350,14 @@ gitAnnexExportDbDir u r = gitAnnexExportDir u r "db" gitAnnexExportLock :: UUID -> Git.Repo -> FilePath gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck" +{- Directory containing database used to record remote content ids. -} +gitAnnexContentIdentifierDbDir :: Git.Repo -> FilePath +gitAnnexContentIdentifierDbDir r = gitAnnexDir r "cid" + +{- Lock file for writing to the content id database. -} +gitAnnexContentIdentifierLock :: Git.Repo -> FilePath +gitAnnexContentIdentifierLock r = gitAnnexContentIdentifierDbDir r ++ ".lck" + {- .git/annex/schedulestate is used to store information about when - scheduled jobs were last run. -} gitAnnexScheduleState :: Git.Repo -> FilePath diff --git a/COPYRIGHT b/COPYRIGHT index 40c7e02d79..2691795e4a 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -10,7 +10,7 @@ Copyright: © 2012-2017 Joey Hess © 2014 Sören Brunk License: AGPL-3+ -Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs +Files: Annex/AdjustedBranch.hs Annex/AdjustedBranch/Name.hs Annex/CurrentBranch.hs Annex/Version.hs Benchmark.hs Database/ContentIdentifier.hs Logs/File.hs Logs/Line.hs Logs/Smudge.hs Logs/ContentIdentifier/Pure.hs Remote/Git.hs Remote/Helper/Ssh.hs Remote/Adb.hs Remote/External.hs Remote/Extermal/Types.hs Types/AdjustedBranch.hs Types/RepoVersion.hs Upgrade/V6.hs Copyright: © 2011-2019 Joey Hess License: AGPL-3+ diff --git a/Database/ContentIdentifier.hs b/Database/ContentIdentifier.hs new file mode 100644 index 0000000000..660e645109 --- /dev/null +++ b/Database/ContentIdentifier.hs @@ -0,0 +1,89 @@ +{- Sqlite database of ContentIdentifiers imported from special remotes. + - + - This contains a mapping from ContentIdentifier to Key. + - The reverse mapping from Key to ContentIdentifier is stored in the + - git-annex branch, see Logs.ContentIdentifier. + - + - Copyright 2019 Joey Hess + -: + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-} +{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} +{-# LANGUAGE RankNTypes #-} + +module Database.ContentIdentifier ( + ContentIdentifierHandle, + openDb, + closeDb, + flushDbQueue, + recordContentIdentifier, + getContentIdentifierKeys, + ContentIdentifiersId, +) where + +import Database.Types +import qualified Database.Queue as H +import Database.Init +import Annex.Locations +import Annex.Common hiding (delete) +import Types.Remote (ContentIdentifier(..)) + +import Database.Persist.Sql hiding (Key) +import Database.Persist.TH + +data ContentIdentifierHandle = ContentIdentifierHandle H.DbQueue + +share [mkPersist sqlSettings, mkMigrate "migrateContentIdentifier"] [persistLowerCase| +ContentIdentifiers + remote UUID + cid ContentIdentifier + key SKey + ContentIdentifiersIndex remote cid + UniqueRemoteCidKey remote cid key +|] + +{- Opens the database, creating it if it doesn't exist yet. + - + - Only a single process should write to the database at a time, so guard + - any writes with the gitAnnexContentIdentifierLock. + -} +openDb :: Annex ContentIdentifierHandle +openDb = do + dbdir <- fromRepo gitAnnexContentIdentifierDbDir + let db = dbdir "db" + unlessM (liftIO $ doesFileExist db) $ do + initDb db $ void $ + runMigrationSilent migrateContentIdentifier + h <- liftIO $ H.openDbQueue H.SingleWriter db "contentidentifiers" + return $ ContentIdentifierHandle h + +closeDb :: ContentIdentifierHandle -> Annex () +closeDb (ContentIdentifierHandle h) = liftIO $ H.closeDbQueue h + +queueDb :: ContentIdentifierHandle -> SqlPersistM () -> IO () +queueDb (ContentIdentifierHandle h) = H.queueDb h checkcommit + where + -- commit queue after 1000 changes + checkcommit sz _lastcommittime + | sz > 1000 = return True + | otherwise = return False + +flushDbQueue :: ContentIdentifierHandle -> IO () +flushDbQueue (ContentIdentifierHandle h) = H.flushDbQueue h + +-- Be sure to also update the git-annex branch when using this. +recordContentIdentifier :: ContentIdentifierHandle -> UUID -> ContentIdentifier -> Key -> IO () +recordContentIdentifier h u cid k = queueDb h $ do + void $ insertUnique $ ContentIdentifiers u cid (toSKey k) + +getContentIdentifierKeys :: ContentIdentifierHandle -> UUID -> ContentIdentifier -> IO [Key] +getContentIdentifierKeys (ContentIdentifierHandle h) u cid = + H.queryDbQueue h $ do + l <- selectList + [ ContentIdentifiersCid ==. cid + , ContentIdentifiersRemote ==. u + ] [] + return $ map (fromSKey . contentIdentifiersKey . entityVal) l diff --git a/Database/Types.hs b/Database/Types.hs index d330b3f760..5145450345 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -1,22 +1,29 @@ {- types for SQL databases - - - Copyright 2015-2017 Joey Hess + - Copyright 2015-2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} {-# LANGUAGE TemplateHaskell #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} module Database.Types where import Database.Persist.TH +import Database.Persist.Class hiding (Key) +import Database.Persist.Sql hiding (Key) import Data.Maybe import Data.Char +import qualified Data.ByteString as S +import qualified Data.Text as T import Utility.PartialPrelude import Key import Utility.InodeCache import Git.Types (Ref(..)) +import Types.UUID +import Types.Remote (ContentIdentifier(..)) -- A serialized Key newtype SKey = SKey String @@ -112,3 +119,26 @@ toSRef = SRef fromSRef :: SRef -> Ref fromSRef (SRef r) = r + +instance PersistField UUID where + toPersistValue u = toPersistValue b + where + b :: S.ByteString + b = fromUUID u + fromPersistValue v = toUUID <$> go + where + go :: Either T.Text S.ByteString + go = fromPersistValue v + +instance PersistFieldSql UUID where + sqlType _ = SqlBlob + +instance PersistField ContentIdentifier where + toPersistValue (ContentIdentifier b) = toPersistValue b + fromPersistValue v = ContentIdentifier <$> go + where + go :: Either T.Text S.ByteString + go = fromPersistValue v + +instance PersistFieldSql ContentIdentifier where + sqlType _ = SqlBlob diff --git a/Logs/ContentIdentifier/Pure.hs b/Logs/ContentIdentifier/Pure.hs index 536b60fa47..8e8e7e04c4 100644 --- a/Logs/ContentIdentifier/Pure.hs +++ b/Logs/ContentIdentifier/Pure.hs @@ -15,7 +15,6 @@ module Logs.ContentIdentifier.Pure import Annex.Common import Logs.MapLog -import Data.Int import Types.Remote (ContentIdentifier(..)) import Utility.Base64 diff --git a/git-annex.cabal b/git-annex.cabal index e5639007d6..89c1f1db44 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -808,6 +808,7 @@ Executable git-annex Config.Smudge Creds Crypto + Database.ContentIdentifier Database.Export Database.Fsck Database.Handle