git-annex/Database/ContentIdentifier.hs

185 lines
6.4 KiB
Haskell
Raw Normal View History

{- Sqlite database of ContentIdentifiers imported from special remotes.
-
- Copyright 2019-2023 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TypeOperators, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts, EmptyDataDecls #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE DataKinds, FlexibleInstances #-}
{-# LANGUAGE UndecidableInstances #-}
#if MIN_VERSION_persistent_template(2,8,0)
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE StandaloneDeriving #-}
#endif
module Database.ContentIdentifier (
ContentIdentifierHandle,
databaseIsEmpty,
openDb,
closeDb,
flushDbQueue,
recordContentIdentifier,
getContentIdentifiers,
getContentIdentifierKeys,
recordAnnexBranchTree,
getAnnexBranchTree,
2019-03-07 16:56:40 +00:00
needsUpdateFromLog,
updateFromLog,
ContentIdentifiersId,
AnnexBranchId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Database.Utility
import Annex.Locations
import Annex.Common hiding (delete)
2019-03-07 16:56:40 +00:00
import qualified Annex.Branch
import Types.Import
import Types.RemoteState
import Git.Types
import Git.Sha
2019-03-07 16:56:40 +00:00
import Git.FilePath
import qualified Git.DiffTree as DiffTree
import Logs
import qualified Logs.ContentIdentifier as Log
import qualified Utility.RawFilePath as R
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
import Database.Persist.Sqlite (runSqlite)
import qualified System.FilePath.ByteString as P
import qualified Data.Text as T
data ContentIdentifierHandle = ContentIdentifierHandle H.DbQueue Bool
databaseIsEmpty :: ContentIdentifierHandle -> Bool
databaseIsEmpty (ContentIdentifierHandle _ b) = b
-- Note on indexes: ContentIndentifiersKeyRemoteCidIndex etc are really
-- uniqueness constraints, which cause sqlite to automatically add indexes.
-- So when adding indexes, have to take care to only add ones that work as
-- uniqueness constraints. (Unfortunately persistent does not support indexes
-- that are not uniqueness constraints;
-- https://github.com/yesodweb/persistent/issues/109)
--
-- ContentIndentifiersKeyRemoteCidIndex speeds up queries like
-- getContentIdentifiers, but it is not used for
-- getContentIdentifierKeys. ContentIndentifiersCidRemoteKeyIndex was
-- addedto speed that up.
share [mkPersist sqlSettings, mkMigrate "migrateContentIdentifier"] [persistLowerCase|
ContentIdentifiers
remote UUID
cid ContentIdentifier
key Key
ContentIndentifiersKeyRemoteCidIndex key remote cid
ContentIndentifiersCidRemoteKeyIndex cid remote key
-- The last git-annex branch tree sha that was used to update
-- ContentIdentifiers
AnnexBranch
tree SSha
UniqueTree tree
|]
{- Opens the database, creating it if it doesn't exist yet.
-
- Only a single process should write to the database at a time, so guard
- any writes with the gitAnnexContentIdentifierLock.
-}
openDb :: Annex ContentIdentifierHandle
openDb = do
dbdir <- calcRepo' gitAnnexContentIdentifierDbDir
let db = dbdir P.</> "db"
isnew <- liftIO $ not <$> R.doesPathExist db
if isnew
then initDb db $ void $
runMigrationSilent migrateContentIdentifier
-- Migrate from old versions of database, which had buggy
-- and suboptimal uniqueness constraints.
else liftIO $ runSqlite (T.pack (fromRawFilePath db)) $ void $
runMigrationSilent migrateContentIdentifier
h <- liftIO $ H.openDbQueue db "content_identifiers"
return $ ContentIdentifierHandle h isnew
closeDb :: ContentIdentifierHandle -> Annex ()
closeDb (ContentIdentifierHandle h _) = liftIO $ H.closeDbQueue h
queueDb :: ContentIdentifierHandle -> SqlPersistM () -> IO ()
queueDb (ContentIdentifierHandle h _) = H.queueDb h checkcommit
where
-- commit queue after 1000 changes
checkcommit sz _lastcommittime
| sz > 1000 = return True
| otherwise = return False
flushDbQueue :: ContentIdentifierHandle -> IO ()
flushDbQueue (ContentIdentifierHandle h _) = H.flushDbQueue h
-- Be sure to also update the git-annex branch when using this.
recordContentIdentifier :: ContentIdentifierHandle -> RemoteStateHandle -> ContentIdentifier -> Key -> IO ()
recordContentIdentifier h (RemoteStateHandle u) cid k = queueDb h $ do
void $ insertUniqueFast $ ContentIdentifiers u cid k
getContentIdentifiers :: ContentIdentifierHandle -> RemoteStateHandle -> Key -> IO [ContentIdentifier]
getContentIdentifiers (ContentIdentifierHandle h _) (RemoteStateHandle u) k =
H.queryDbQueue h $ do
l <- selectList
[ ContentIdentifiersKey ==. k
, ContentIdentifiersRemote ==. u
] []
return $ map (contentIdentifiersCid . entityVal) l
getContentIdentifierKeys :: ContentIdentifierHandle -> RemoteStateHandle -> ContentIdentifier -> IO [Key]
getContentIdentifierKeys (ContentIdentifierHandle h _) (RemoteStateHandle u) cid =
H.queryDbQueue h $ do
l <- selectList
[ ContentIdentifiersCid ==. cid
, ContentIdentifiersRemote ==. u
] []
return $ map (contentIdentifiersKey . entityVal) l
recordAnnexBranchTree :: ContentIdentifierHandle -> Sha -> IO ()
recordAnnexBranchTree h s = queueDb h $ do
2023-05-30 19:42:11 +00:00
deleteWhere ([] :: [Filter AnnexBranch])
void $ insertUniqueFast $ AnnexBranch $ toSSha s
getAnnexBranchTree :: ContentIdentifierHandle -> IO Sha
getAnnexBranchTree (ContentIdentifierHandle h _) = H.queryDbQueue h $ do
2023-05-30 19:42:11 +00:00
l <- selectList ([] :: [Filter AnnexBranch]) []
case l of
(s:[]) -> return $ fromSSha $ annexBranchTree $ entityVal s
_ -> return emptyTree
2019-03-07 16:56:40 +00:00
{- Check if the git-annex branch has been updated and the database needs
- to be updated with any new content identifiers in it. -}
needsUpdateFromLog :: ContentIdentifierHandle -> Annex (Maybe (Sha, Sha))
needsUpdateFromLog db = do
oldtree <- liftIO $ getAnnexBranchTree db
Annex.Branch.updatedFromTree oldtree
2019-03-07 16:56:40 +00:00
{- The database should be locked for write when calling this. -}
updateFromLog :: ContentIdentifierHandle -> (Sha, Sha) -> Annex ContentIdentifierHandle
updateFromLog db@(ContentIdentifierHandle h _) (oldtree, currtree) = do
2019-03-07 16:56:40 +00:00
(l, cleanup) <- inRepo $
DiffTree.diffTreeRecursive oldtree currtree
mapM_ go l
void $ liftIO $ cleanup
liftIO $ do
recordAnnexBranchTree db currtree
flushDbQueue db
return (ContentIdentifierHandle h False)
2019-03-07 16:56:40 +00:00
where
go ti = case extLogFileKey remoteContentIdentifierExt (getTopFilePath (DiffTree.file ti)) of
2019-03-07 16:56:40 +00:00
Nothing -> return ()
Just k -> do
l <- Log.getContentIdentifiers k
liftIO $ forM_ l $ \(rs, cids) ->
2019-03-07 16:56:40 +00:00
forM_ cids $ \cid ->
recordContentIdentifier db rs cid k