eliminate raw sql LIKE query

This commit is contained in:
Joey Hess 2019-10-30 15:16:03 -04:00
parent 09c7cbbaa8
commit 4940a135af
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 67 additions and 43 deletions

View file

@ -24,8 +24,6 @@ import Database.Persist.TH
import Data.Time.Clock import Data.Time.Clock
import Control.Monad import Control.Monad
import Data.Maybe import Data.Maybe
import qualified Data.Text as T
import qualified Data.Conduit.List as CL
-- Note on indexes: KeyFileIndex etc are really uniqueness constraints, -- Note on indexes: KeyFileIndex etc are really uniqueness constraints,
-- which cause sqlite to automatically add indexes. So when adding indexes, -- which cause sqlite to automatically add indexes. So when adding indexes,
@ -51,6 +49,8 @@ Associated
Content Content
key Key key Key
inodecache InodeCache inodecache InodeCache
filesize FileSize
mtime EpochTime
KeyInodeCacheIndex key inodecache KeyInodeCacheIndex key inodecache
InodeCacheKeyIndex inodecache key InodeCacheKeyIndex inodecache key
|] |]
@ -123,7 +123,9 @@ removeAssociatedFile k f = queueDb $
addInodeCaches :: Key -> [InodeCache] -> WriteHandle -> IO () addInodeCaches :: Key -> [InodeCache] -> WriteHandle -> IO ()
addInodeCaches k is = queueDb $ addInodeCaches k is = queueDb $
forM_ is $ \i -> insertUnique $ Content k i forM_ is $ \i -> insertUnique $ Content k i
(inodeCacheToFileSize i)
(inodeCacheToEpochTime i)
{- A key may have multiple InodeCaches; one for the annex object, and one {- A key may have multiple InodeCaches; one for the annex object, and one
- for each pointer file that is a copy of it. -} - for each pointer file that is a copy of it. -}
@ -136,28 +138,19 @@ removeInodeCaches :: Key -> WriteHandle -> IO ()
removeInodeCaches k = queueDb $ removeInodeCaches k = queueDb $
deleteWhere [ContentKey ==. k] deleteWhere [ContentKey ==. k]
{- Check if the inode is known to be used for an annexed file. {- Check if the inode is known to be used for an annexed file. -}
-
- This is currently slow due to the lack of indexes.
-}
isInodeKnown :: InodeCache -> SentinalStatus -> ReadHandle -> IO Bool isInodeKnown :: InodeCache -> SentinalStatus -> ReadHandle -> IO Bool
isInodeKnown i s = readDb query isInodeKnown i s = readDb (isJust <$> selectFirst q [])
where where
query q
| sentinalInodesChanged s = | sentinalInodesChanged s =
withRawQuery likesql [] $ isJust <$> CL.head -- Note that this select is intentionally not
| otherwise = -- indexed. Normally, the inodes have not changed,
isJust <$> selectFirst [ContentInodecache ==. i] [] -- and it would be unncessary work to maintain
-- indexes for the unusual case.
likesql = T.concat [ ContentFilesize ==. inodeCacheToFileSize i
[ "SELECT key FROM content WHERE " , ContentMtime >=. tmin
, T.intercalate " OR " $ map mklike (likeInodeCacheWeak i) , ContentMtime <=. tmax
, " LIMIT 1" ]
] | otherwise = [ContentInodecache ==. i]
(tmin, tmax) = inodeCacheEpochTimeRange i
mklike p = T.concat
[ "cache LIKE "
, "'"
, T.pack p
, "'"
]

View file

@ -6,10 +6,13 @@
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Database.Types ( module Database.Types (
module Database.Types, module Database.Types,
Key, Key,
EpochTime,
FileSize,
) where ) where
import Database.Persist.Class hiding (Key) import Database.Persist.Class hiding (Key)
@ -18,9 +21,14 @@ import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Attoparsec.ByteString as A import qualified Data.Attoparsec.ByteString as A
import System.PosixCompat.Types
import Data.Int
import Data.Text.Read
import Foreign.C.Types
import Key import Key
import Utility.InodeCache import Utility.InodeCache
import Utility.FileSize
import Utility.FileSystemEncoding import Utility.FileSystemEncoding
import Git.Types import Git.Types
import Types.UUID import Types.UUID
@ -105,3 +113,26 @@ instance PersistField SSha where
instance PersistFieldSql SSha where instance PersistFieldSql SSha where
sqlType _ = SqlString sqlType _ = SqlString
-- A FileSize could be stored as an Int64, but some systems could
-- conceivably have a larger filesize, and no math is ever done with them
-- in sqlite, so store a string instead.
instance PersistField FileSize where
toPersistValue = toPersistValue . show
fromPersistValue v = fromPersistValue v >>= parse
where
parse = either (Left . T.pack) (Right . fst) . decimal
instance PersistFieldSql FileSize where
sqlType _ = SqlString
-- Store EpochTime as an Int64, to allow selecting values in a range.
instance PersistField EpochTime where
toPersistValue (CTime t) = toPersistValue (fromIntegral t :: Int64)
fromPersistValue v = CTime . fromIntegral <$> go
where
go :: Either T.Text Int64
go = fromPersistValue v
instance PersistFieldSql EpochTime where
sqlType _ = SqlInt64

View file

@ -1,7 +1,7 @@
{- Caching a file's inode, size, and modification time {- Caching a file's inode, size, and modification time
- to see when it's changed. - to see when it's changed.
- -
- Copyright 2013-2018 Joey Hess <id@joeyh.name> - Copyright 2013-2019 Joey Hess <id@joeyh.name>
- -
- License: BSD-2-clause - License: BSD-2-clause
-} -}
@ -23,11 +23,13 @@ module Utility.InodeCache (
showInodeCache, showInodeCache,
genInodeCache, genInodeCache,
toInodeCache, toInodeCache,
likeInodeCacheWeak,
InodeCacheKey, InodeCacheKey,
inodeCacheToKey, inodeCacheToKey,
inodeCacheToFileSize,
inodeCacheToMtime, inodeCacheToMtime,
inodeCacheToEpochTime,
inodeCacheEpochTimeRange,
SentinalFile(..), SentinalFile(..),
SentinalStatus(..), SentinalStatus(..),
@ -101,9 +103,21 @@ instance Eq InodeCacheKey where
inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
inodeCacheToFileSize :: InodeCache -> FileSize
inodeCacheToFileSize (InodeCache (InodeCachePrim _ sz _)) = sz
inodeCacheToMtime :: InodeCache -> POSIXTime inodeCacheToMtime :: InodeCache -> POSIXTime
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = highResTime mtime inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = highResTime mtime
inodeCacheToEpochTime :: InodeCache -> EpochTime
inodeCacheToEpochTime (InodeCache (InodeCachePrim _ _ mtime)) = lowResTime mtime
-- Returns min, max EpochTime that weakly match the time of the InodeCache.
inodeCacheEpochTimeRange :: InodeCache -> (EpochTime, EpochTime)
inodeCacheEpochTimeRange i =
let t = inodeCacheToEpochTime i
in (t-1, t+1)
{- For backwards compatability, support low-res mtime with no {- For backwards compatability, support low-res mtime with no
- fractional seconds. -} - fractional seconds. -}
data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime
@ -150,22 +164,6 @@ showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) =
, show mtime , show mtime
] ]
-- Generates patterns that can be used in a SQL LIKE query to match
-- serialized inode caches that are weakly the same as the provided
-- InodeCache.
--
-- Like compareWeak, the size has to match, while the mtime can differ
-- by anything less than 2 seconds.
likeInodeCacheWeak :: InodeCache -> [String]
likeInodeCacheWeak (InodeCache (InodeCachePrim _ size mtime)) =
lowresl ++ highresl
where
lowresl = map mkpat [t, t+1, t-1]
highresl = map (++ " %") lowresl
t = lowResTime mtime
mkpat t' = "% " ++ ssz ++ " " ++ show t'
ssz = show size
readInodeCache :: String -> Maybe InodeCache readInodeCache :: String -> Maybe InodeCache
readInodeCache s = case words s of readInodeCache s = case words s of
(inode:size:mtime:[]) -> do (inode:size:mtime:[]) -> do

View file

@ -10,6 +10,8 @@ process.
Eg, store the size and allowable mtimes in a separate table that is Eg, store the size and allowable mtimes in a separate table that is
queried when necessary. queried when necessary.
Fixed.
* Several selects were not able to use indexes, so would be slow. * Several selects were not able to use indexes, so would be slow.
Fixed by adding indexes. Fixed by adding indexes.