horrible impementation of isInodeKnown

The only good thing about it is it does not require a major version bump
to improve the database. That will need to happen at some point though.

Potentially very very slow in a large repository.

Ugly use of raw sql.
This commit is contained in:
Joey Hess 2019-10-23 14:06:11 -04:00
parent eebf080b33
commit 94efc400e9
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 65 additions and 2 deletions

View file

@ -1,6 +1,6 @@
{- Sqlite database of information about Keys
-
- Copyright 2015-2018 Joey Hess <id@joeyh.name>
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -19,6 +19,7 @@ module Database.Keys (
addInodeCaches,
getInodeCaches,
removeInodeCaches,
isInodeKnown,
runWriter,
) where
@ -187,6 +188,9 @@ getInodeCaches = runReaderIO . SQL.getInodeCaches . toIKey
removeInodeCaches :: Key -> Annex ()
removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toIKey
isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool
isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s)
{- Looks at staged changes to find when unlocked files are copied/moved,
- and updates associated files in the keys database.
-

View file

@ -1,6 +1,6 @@
{- Sqlite database of information about Keys
-
- Copyright 2015-2016 Joey Hess <id@joeyh.name>
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -23,6 +23,9 @@ import Database.Persist.Sql
import Database.Persist.TH
import Data.Time.Clock
import Control.Monad
import Data.Maybe
import qualified Data.Text as T
import qualified Data.Conduit.List as CL
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
Associated
@ -116,3 +119,31 @@ getInodeCaches ik = readDb $ do
removeInodeCaches :: IKey -> WriteHandle -> IO ()
removeInodeCaches ik = queueDb $
deleteWhere [ContentKey ==. ik]
{- Check if the inode is known to be used for an annexed file.
-
- This is currently slow due to the lack of indexes.
-}
isInodeKnown :: InodeCache -> SentinalStatus -> ReadHandle -> IO Bool
isInodeKnown i s = readDb query
where
query
| sentinalInodesChanged s =
withRawQuery likesql [] $ isJust <$> CL.head
| otherwise =
isJust <$> selectFirst [ContentCache ==. si] []
si = toSInodeCache i
likesql = T.concat
[ "SELECT key FROM content WHERE "
, T.unwords (map mklike (likeInodeCacheWeak i))
, " LIMIT 1"
]
mklike p = T.concat
[ "cache LIKE "
, "'I \"" -- SInodeCache serializes as I "..."
, T.pack p
, "\"'"
]

View file

@ -23,6 +23,7 @@ module Utility.InodeCache (
showInodeCache,
genInodeCache,
toInodeCache,
likeInodeCacheWeak,
InodeCacheKey,
inodeCacheToKey,
@ -149,6 +150,22 @@ showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) =
, show mtime
]
-- Generates patterns that can be used in a SQL LIKE query to match
-- serialized inode caches that are weakly the same as the provided
-- InodeCache.
--
-- Like compareWeak, the size has to match, while the mtime can differ
-- by anything less than 2 seconds.
likeInodeCacheWeak :: InodeCache -> [String]
likeInodeCacheWeak (InodeCache (InodeCachePrim _ size mtime)) =
lowresl ++ highresl
where
lowresl = map mkpat [t, t+1, t-1]
highresl = map (++ " %") lowresl
t = lowResTime mtime
mkpat t' = "% " ++ ssz ++ " " ++ show t'
ssz = show size
readInodeCache :: String -> Maybe InodeCache
readInodeCache s = case words s of
(inode:size:mtime:[]) -> do

View file

@ -2,6 +2,17 @@ Collection of non-ideal things about git-annex's use of sqlite databases.
Would be good to improve these sometime, but it would need a migration
process.
* Database.Keys.SQL.isInodeKnown seems likely to get very slow
when there are a lot of unlocked annexed files. It needs
an index in the database, eg "InodeIndex cache"
It also has to do some really ugly SQL LIKE queries. Probably an index
would not speed them up. They're only needed when git-annex detects
inodes are not stable, eg on fat or probably windows. A better database
schema should be able to eliminate the need for those LIKE queries.
Eg, store the size and allowable mtimes in a separate table that is
queried when necessary.
* Database.Export.getExportedKey would be faster if there was an index
in the database, eg "ExportedIndex file key". This only affects
the speed of `git annex export`, which is probably swamped by the actual