eliminate raw sql LIKE query

This commit is contained in:
Joey Hess 2019-10-30 15:16:03 -04:00
parent 09c7cbbaa8
commit 4940a135af
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 67 additions and 43 deletions

View file

@ -24,8 +24,6 @@ import Database.Persist.TH
import Data.Time.Clock
import Control.Monad
import Data.Maybe
import qualified Data.Text as T
import qualified Data.Conduit.List as CL
-- Note on indexes: KeyFileIndex etc are really uniqueness constraints,
-- which cause sqlite to automatically add indexes. So when adding indexes,
@ -51,6 +49,8 @@ Associated
Content
key Key
inodecache InodeCache
filesize FileSize
mtime EpochTime
KeyInodeCacheIndex key inodecache
InodeCacheKeyIndex inodecache key
|]
@ -123,7 +123,9 @@ removeAssociatedFile k f = queueDb $
addInodeCaches :: Key -> [InodeCache] -> WriteHandle -> IO ()
addInodeCaches k is = queueDb $
forM_ is $ \i -> insertUnique $ Content k i
forM_ is $ \i -> insertUnique $ Content k i
(inodeCacheToFileSize i)
(inodeCacheToEpochTime i)
{- A key may have multiple InodeCaches; one for the annex object, and one
- for each pointer file that is a copy of it. -}
@ -136,28 +138,19 @@ removeInodeCaches :: Key -> WriteHandle -> IO ()
removeInodeCaches k = queueDb $
deleteWhere [ContentKey ==. k]
{- Check if the inode is known to be used for an annexed file.
-
- This is currently slow due to the lack of indexes.
-}
{- Check if the inode is known to be used for an annexed file. -}
isInodeKnown :: InodeCache -> SentinalStatus -> ReadHandle -> IO Bool
isInodeKnown i s = readDb query
isInodeKnown i s = readDb (isJust <$> selectFirst q [])
where
query
q
| sentinalInodesChanged s =
withRawQuery likesql [] $ isJust <$> CL.head
| otherwise =
isJust <$> selectFirst [ContentInodecache ==. i] []
likesql = T.concat
[ "SELECT key FROM content WHERE "
, T.intercalate " OR " $ map mklike (likeInodeCacheWeak i)
, " LIMIT 1"
]
mklike p = T.concat
[ "cache LIKE "
, "'"
, T.pack p
, "'"
]
-- Note that this select is intentionally not
-- indexed. Normally, the inodes have not changed,
-- and it would be unncessary work to maintain
-- indexes for the unusual case.
[ ContentFilesize ==. inodeCacheToFileSize i
, ContentMtime >=. tmin
, ContentMtime <=. tmax
]
| otherwise = [ContentInodecache ==. i]
(tmin, tmax) = inodeCacheEpochTimeRange i

View file

@ -6,10 +6,13 @@
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Database.Types (
module Database.Types,
Key,
EpochTime,
FileSize,
) where
import Database.Persist.Class hiding (Key)
@ -18,9 +21,14 @@ import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Text as T
import qualified Data.Attoparsec.ByteString as A
import System.PosixCompat.Types
import Data.Int
import Data.Text.Read
import Foreign.C.Types
import Key
import Utility.InodeCache
import Utility.FileSize
import Utility.FileSystemEncoding
import Git.Types
import Types.UUID
@ -105,3 +113,26 @@ instance PersistField SSha where
instance PersistFieldSql SSha where
sqlType _ = SqlString
-- A FileSize could be stored as an Int64, but some systems could
-- conceivably have a larger filesize, and no math is ever done with them
-- in sqlite, so store a string instead.
instance PersistField FileSize where
toPersistValue = toPersistValue . show
fromPersistValue v = fromPersistValue v >>= parse
where
parse = either (Left . T.pack) (Right . fst) . decimal
instance PersistFieldSql FileSize where
sqlType _ = SqlString
-- Store EpochTime as an Int64, to allow selecting values in a range.
instance PersistField EpochTime where
toPersistValue (CTime t) = toPersistValue (fromIntegral t :: Int64)
fromPersistValue v = CTime . fromIntegral <$> go
where
go :: Either T.Text Int64
go = fromPersistValue v
instance PersistFieldSql EpochTime where
sqlType _ = SqlInt64

View file

@ -1,7 +1,7 @@
{- Caching a file's inode, size, and modification time
- to see when it's changed.
-
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
@ -23,11 +23,13 @@ module Utility.InodeCache (
showInodeCache,
genInodeCache,
toInodeCache,
likeInodeCacheWeak,
InodeCacheKey,
inodeCacheToKey,
inodeCacheToFileSize,
inodeCacheToMtime,
inodeCacheToEpochTime,
inodeCacheEpochTimeRange,
SentinalFile(..),
SentinalStatus(..),
@ -101,9 +103,21 @@ instance Eq InodeCacheKey where
inodeCacheToKey :: InodeComparisonType -> InodeCache -> InodeCacheKey
inodeCacheToKey ct (InodeCache prim) = InodeCacheKey ct prim
inodeCacheToFileSize :: InodeCache -> FileSize
inodeCacheToFileSize (InodeCache (InodeCachePrim _ sz _)) = sz
inodeCacheToMtime :: InodeCache -> POSIXTime
inodeCacheToMtime (InodeCache (InodeCachePrim _ _ mtime)) = highResTime mtime
inodeCacheToEpochTime :: InodeCache -> EpochTime
inodeCacheToEpochTime (InodeCache (InodeCachePrim _ _ mtime)) = lowResTime mtime
-- Returns min, max EpochTime that weakly match the time of the InodeCache.
inodeCacheEpochTimeRange :: InodeCache -> (EpochTime, EpochTime)
inodeCacheEpochTimeRange i =
let t = inodeCacheToEpochTime i
in (t-1, t+1)
{- For backwards compatability, support low-res mtime with no
- fractional seconds. -}
data MTime = MTimeLowRes EpochTime | MTimeHighRes POSIXTime
@ -150,22 +164,6 @@ showInodeCache (InodeCache (InodeCachePrim inode size (MTimeLowRes mtime))) =
, show mtime
]
-- Generates patterns that can be used in a SQL LIKE query to match
-- serialized inode caches that are weakly the same as the provided
-- InodeCache.
--
-- Like compareWeak, the size has to match, while the mtime can differ
-- by anything less than 2 seconds.
likeInodeCacheWeak :: InodeCache -> [String]
likeInodeCacheWeak (InodeCache (InodeCachePrim _ size mtime)) =
lowresl ++ highresl
where
lowresl = map mkpat [t, t+1, t-1]
highresl = map (++ " %") lowresl
t = lowResTime mtime
mkpat t' = "% " ++ ssz ++ " " ++ show t'
ssz = show size
readInodeCache :: String -> Maybe InodeCache
readInodeCache s = case words s of
(inode:size:mtime:[]) -> do

View file

@ -10,6 +10,8 @@ process.
Eg, store the size and allowable mtimes in a separate table that is
queried when necessary.
Fixed.
* Several selects were not able to use indexes, so would be slow.
Fixed by adding indexes.