diff --git a/Database/Keys.hs b/Database/Keys.hs index e60724c2c1..c31f647c09 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -1,6 +1,6 @@ {- Sqlite database of information about Keys - - - Copyright 2015-2018 Joey Hess + - Copyright 2015-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -19,6 +19,7 @@ module Database.Keys ( addInodeCaches, getInodeCaches, removeInodeCaches, + isInodeKnown, runWriter, ) where @@ -187,6 +188,9 @@ getInodeCaches = runReaderIO . SQL.getInodeCaches . toIKey removeInodeCaches :: Key -> Annex () removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toIKey +isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool +isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s) + {- Looks at staged changes to find when unlocked files are copied/moved, - and updates associated files in the keys database. - diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs index 019990f926..7b5bb5feb6 100644 --- a/Database/Keys/SQL.hs +++ b/Database/Keys/SQL.hs @@ -1,6 +1,6 @@ {- Sqlite database of information about Keys - - - Copyright 2015-2016 Joey Hess + - Copyright 2015-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -23,6 +23,9 @@ import Database.Persist.Sql import Database.Persist.TH import Data.Time.Clock import Control.Monad +import Data.Maybe +import qualified Data.Text as T +import qualified Data.Conduit.List as CL share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| Associated @@ -116,3 +119,31 @@ getInodeCaches ik = readDb $ do removeInodeCaches :: IKey -> WriteHandle -> IO () removeInodeCaches ik = queueDb $ deleteWhere [ContentKey ==. ik] + +{- Check if the inode is known to be used for an annexed file. + - + - This is currently slow due to the lack of indexes. + -} +isInodeKnown :: InodeCache -> SentinalStatus -> ReadHandle -> IO Bool +isInodeKnown i s = readDb query + where + query + | sentinalInodesChanged s = + withRawQuery likesql [] $ isJust <$> CL.head + | otherwise = + isJust <$> selectFirst [ContentCache ==. si] [] + + si = toSInodeCache i + + likesql = T.concat + [ "SELECT key FROM content WHERE " + , T.unwords (map mklike (likeInodeCacheWeak i)) + , " LIMIT 1" + ] + + mklike p = T.concat + [ "cache LIKE " + , "'I \"" -- SInodeCache serializes as I "..." + , T.pack p + , "\"'" + ] diff --git a/Utility/InodeCache.hs b/Utility/InodeCache.hs index 7d2959d8ff..0eac8a3018 100644 --- a/Utility/InodeCache.hs +++ b/Utility/InodeCache.hs @@ -23,6 +23,7 @@ module Utility.InodeCache ( showInodeCache, genInodeCache, toInodeCache, + likeInodeCacheWeak, InodeCacheKey, inodeCacheToKey, @@ -149,6 +150,22 @@ showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) = , show mtime ] +-- Generates patterns that can be used in a SQL LIKE query to match +-- serialized inode caches that are weakly the same as the provided +-- InodeCache. +-- +-- Like compareWeak, the size has to match, while the mtime can differ +-- by anything less than 2 seconds. +likeInodeCacheWeak :: InodeCache -> [String] +likeInodeCacheWeak (InodeCache (InodeCachePrim _ size mtime)) = + lowresl ++ highresl + where + lowresl = map mkpat [t, t+1, t-1] + highresl = map (++ " %") lowresl + t = lowResTime mtime + mkpat t' = "% " ++ ssz ++ " " ++ show t' + ssz = show size + readInodeCache :: String -> Maybe InodeCache readInodeCache s = case words s of (inode:size:mtime:[]) -> do diff --git a/doc/todo/sqlite_database_improvements.mdwn b/doc/todo/sqlite_database_improvements.mdwn index fdb64738b8..62bd6e8622 100644 --- a/doc/todo/sqlite_database_improvements.mdwn +++ b/doc/todo/sqlite_database_improvements.mdwn @@ -2,6 +2,17 @@ Collection of non-ideal things about git-annex's use of sqlite databases. Would be good to improve these sometime, but it would need a migration process. +* Database.Keys.SQL.isInodeKnown seems likely to get very slow + when there are a lot of unlocked annexed files. It needs + an index in the database, eg "InodeIndex cache" + + It also has to do some really ugly SQL LIKE queries. Probably an index + would not speed them up. They're only needed when git-annex detects + inodes are not stable, eg on fat or probably windows. A better database + schema should be able to eliminate the need for those LIKE queries. + Eg, store the size and allowable mtimes in a separate table that is + queried when necessary. + * Database.Export.getExportedKey would be faster if there was an index in the database, eg "ExportedIndex file key". This only affects the speed of `git annex export`, which is probably swamped by the actual