git-annex/Database/Keys/SQL.hs
Joey Hess c35a9047d3
improve data types for sqlite
This is a non-backwards compatable change, so not suitable for merging
w/o a annex.version bump and transition code. Not yet tested.

This improves performance of git-annex benchmark --databases
across the board by 10-25%, since eg Key roundtrips as a ByteString.

(serializeKey' produces a lazy ByteString, so there is still a
copy involved in converting it to a strict ByteString. It may be faster
to switch to using bytestring-strict-builder.)

FilePath and Key are both stored as blobs. This avoids mojibake in some
situations. It would be possible to use varchar instead, if persistent
could avoid converting that to Text, but it seems there is no good
way to do so. See doc/todo/sqlite_database_improvements.mdwn

Eliminated some ugly artifacts of using Read/Show serialization;
constructors and quoted strings are no longer stored in sqlite.

Renamed SRef to SSha to reflect that it is only ever a git sha,
not a ref name. Since it is limited to the characters in a sha,
it is not affected by mojibake, so still uses String.
2019-10-29 17:05:36 -04:00

147 lines
4.5 KiB
Haskell

{- Sqlite database of information about Keys
-
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes, ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
module Database.Keys.SQL where
import Database.Types
import Database.Handle
import qualified Database.Queue as H
import Utility.InodeCache
import Git.FilePath
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
import Data.Time.Clock
import Control.Monad
import Data.Maybe
import qualified Data.Text as T
import qualified Data.Conduit.List as CL
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
Associated
key Key
file SFilePath
KeyFileIndex key file
FileKeyIndex file key
Content
key Key
cache InodeCache
KeyCacheIndex key cache
|]
containedTable :: TableName
containedTable = "content"
createTables :: SqlPersistM ()
createTables = void $ runMigrationSilent migrateKeysDb
newtype ReadHandle = ReadHandle H.DbQueue
readDb :: SqlPersistM a -> ReadHandle -> IO a
readDb a (ReadHandle h) = H.queryDbQueue h a
newtype WriteHandle = WriteHandle H.DbQueue
queueDb :: SqlPersistM () -> WriteHandle -> IO ()
queueDb a (WriteHandle h) = H.queueDb h checkcommit a
where
-- commit queue after 1000 changes or 5 minutes, whichever comes first
checkcommit sz lastcommittime
| sz > 1000 = return True
| otherwise = do
now <- getCurrentTime
return $ diffUTCTime now lastcommittime > 300
addAssociatedFile :: Key -> TopFilePath -> WriteHandle -> IO ()
addAssociatedFile k f = queueDb $ do
-- If the same file was associated with a different key before,
-- remove that.
deleteWhere [AssociatedFile ==. af, AssociatedKey !=. k]
void $ insertUnique $ Associated k af
where
af = toSFilePath (getTopFilePath f)
-- Does not remove any old association for a file, but less expensive
-- than addAssociatedFile. Calling dropAllAssociatedFiles first and then
-- this is an efficient way to update all associated files.
addAssociatedFileFast :: Key -> TopFilePath -> WriteHandle -> IO ()
addAssociatedFileFast k f = queueDb $ void $ insertUnique $ Associated k af
where
af = toSFilePath (getTopFilePath f)
dropAllAssociatedFiles :: WriteHandle -> IO ()
dropAllAssociatedFiles = queueDb $
deleteWhere ([] :: [Filter Associated])
{- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -}
getAssociatedFiles :: Key -> ReadHandle -> IO [TopFilePath]
getAssociatedFiles k = readDb $ do
l <- selectList [AssociatedKey ==. k] []
return $ map (asTopFilePath . fromSFilePath . associatedFile . entityVal) l
{- Gets any keys that are on record as having a particular associated file.
- (Should be one or none but the database doesn't enforce that.) -}
getAssociatedKey :: TopFilePath -> ReadHandle -> IO [Key]
getAssociatedKey f = readDb $ do
l <- selectList [AssociatedFile ==. af] []
return $ map (associatedKey . entityVal) l
where
af = toSFilePath (getTopFilePath f)
removeAssociatedFile :: Key -> TopFilePath -> WriteHandle -> IO ()
removeAssociatedFile k f = queueDb $
deleteWhere [AssociatedKey ==. k, AssociatedFile ==. af]
where
af = toSFilePath (getTopFilePath f)
addInodeCaches :: Key -> [InodeCache] -> WriteHandle -> IO ()
addInodeCaches k is = queueDb $
forM_ is $ \i -> insertUnique $ Content k i
{- A key may have multiple InodeCaches; one for the annex object, and one
- for each pointer file that is a copy of it. -}
getInodeCaches :: Key -> ReadHandle -> IO [InodeCache]
getInodeCaches k = readDb $ do
l <- selectList [ContentKey ==. k] []
return $ map (contentCache . entityVal) l
removeInodeCaches :: Key -> WriteHandle -> IO ()
removeInodeCaches k = queueDb $
deleteWhere [ContentKey ==. k]
{- Check if the inode is known to be used for an annexed file.
-
- This is currently slow due to the lack of indexes.
-}
isInodeKnown :: InodeCache -> SentinalStatus -> ReadHandle -> IO Bool
isInodeKnown i s = readDb query
where
query
| sentinalInodesChanged s =
withRawQuery likesql [] $ isJust <$> CL.head
| otherwise =
isJust <$> selectFirst [ContentCache ==. i] []
likesql = T.concat
[ "SELECT key FROM content WHERE "
, T.intercalate " OR " $ map mklike (likeInodeCacheWeak i)
, " LIMIT 1"
]
mklike p = T.concat
[ "cache LIKE "
, "'"
, T.pack p
, "'"
]