change keys database to use IKey type with more efficient serialization

This breaks any existing keys database!

IKey serializes more efficiently than SKey, although this limits the
use of its Read/Show instances.

This makes the keys database use less disk space, and so should be a win.

Updated benchmark:

benchmarking keys database/getAssociatedFiles from 1000 (hit)
time                 64.04 μs   (63.95 μs .. 64.13 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 64.02 μs   (63.96 μs .. 64.08 μs)
std dev              218.2 ns   (172.5 ns .. 299.3 ns)

benchmarking keys database/getAssociatedFiles from 1000 (miss)
time                 52.53 μs   (52.18 μs .. 53.21 μs)
                     0.999 R²   (0.998 R² .. 1.000 R²)
mean                 52.31 μs   (52.18 μs .. 52.91 μs)
std dev              734.6 ns   (206.2 ns .. 1.623 μs)

benchmarking keys database/getAssociatedKey from 1000 (hit)
time                 64.60 μs   (64.46 μs .. 64.77 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 64.74 μs   (64.57 μs .. 65.20 μs)
std dev              900.2 ns   (389.7 ns .. 1.733 μs)

benchmarking keys database/getAssociatedKey from 1000 (miss)
time                 52.46 μs   (52.29 μs .. 52.68 μs)
                     1.000 R²   (0.999 R² .. 1.000 R²)
mean                 52.63 μs   (52.35 μs .. 53.37 μs)
std dev              1.362 μs   (562.7 ns .. 2.608 μs)
variance introduced by outliers: 24% (moderately inflated)

benchmarking keys database/addAssociatedFile to 1000 (old)
time                 487.3 μs   (484.7 μs .. 490.1 μs)
                     1.000 R²   (0.999 R² .. 1.000 R²)
mean                 490.9 μs   (487.8 μs .. 496.5 μs)
std dev              13.95 μs   (6.841 μs .. 22.03 μs)
variance introduced by outliers: 20% (moderately inflated)

benchmarking keys database/addAssociatedFile to 1000 (new)
time                 6.633 ms   (5.741 ms .. 7.751 ms)
                     0.905 R²   (0.850 R² .. 0.965 R²)
mean                 8.252 ms   (7.803 ms .. 8.602 ms)
std dev              1.126 ms   (900.3 μs .. 1.430 ms)
variance introduced by outliers: 72% (severely inflated)

benchmarking keys database/getAssociatedFiles from 10000 (hit)
time                 65.36 μs   (64.71 μs .. 66.37 μs)
                     0.998 R²   (0.995 R² .. 1.000 R²)
mean                 65.28 μs   (64.72 μs .. 66.45 μs)
std dev              2.576 μs   (920.8 ns .. 4.122 μs)
variance introduced by outliers: 42% (moderately inflated)

benchmarking keys database/getAssociatedFiles from 10000 (miss)
time                 52.34 μs   (52.25 μs .. 52.45 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 52.49 μs   (52.42 μs .. 52.59 μs)
std dev              255.4 ns   (205.8 ns .. 312.9 ns)

benchmarking keys database/getAssociatedKey from 10000 (hit)
time                 64.76 μs   (64.67 μs .. 64.84 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 64.67 μs   (64.62 μs .. 64.72 μs)
std dev              177.3 ns   (148.1 ns .. 217.1 ns)

benchmarking keys database/getAssociatedKey from 10000 (miss)
time                 52.75 μs   (52.66 μs .. 52.82 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 52.69 μs   (52.63 μs .. 52.75 μs)
std dev              210.6 ns   (173.7 ns .. 265.9 ns)

benchmarking keys database/addAssociatedFile to 10000 (old)
time                 489.7 μs   (488.7 μs .. 490.7 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 490.4 μs   (489.6 μs .. 492.2 μs)
std dev              3.990 μs   (2.435 μs .. 7.604 μs)

benchmarking keys database/addAssociatedFile to 10000 (new)
time                 9.994 ms   (9.186 ms .. 10.74 ms)
                     0.959 R²   (0.928 R² .. 0.979 R²)
mean                 9.906 ms   (9.343 ms .. 10.40 ms)
std dev              1.384 ms   (1.051 ms .. 2.100 ms)
variance introduced by outliers: 69% (severely inflated)
This commit is contained in:
Joey Hess 2016-01-12 14:01:50 -04:00
parent 789fc32af0
commit 423fffcd41
Failed to extract signature
4 changed files with 53 additions and 35 deletions

View file

@ -87,13 +87,13 @@ populateAssociatedFiles h num = do
SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h) SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h H.flushDbQueue h
keyN :: Int -> SKey keyN :: Int -> IKey
keyN n = SKey ("key" ++ show n) keyN n = IKey ("key" ++ show n)
fileN :: Int -> TopFilePath fileN :: Int -> TopFilePath
fileN n = asTopFilePath ("file" ++ show n) fileN n = asTopFilePath ("file" ++ show n)
keyMiss :: SKey keyMiss :: IKey
keyMiss = keyN 0 -- 0 is never stored keyMiss = keyN 0 -- 0 is never stored
fileMiss :: TopFilePath fileMiss :: TopFilePath
@ -114,8 +114,8 @@ benchDb tmpdir num = do
instance NFData TopFilePath where instance NFData TopFilePath where
rnf = rnf . getTopFilePath rnf = rnf . getTopFilePath
instance NFData SKey where instance NFData IKey where
rnf (SKey s) = rnf s rnf (IKey s) = rnf s
-- can't use Criterion's defaultMain here because it looks at -- can't use Criterion's defaultMain here because it looks at
-- command-line parameters -- command-line parameters

View file

@ -133,20 +133,20 @@ openDb createdb _ = withExclusiveLock gitAnnexKeysDbLock $ do
open db = liftIO $ DbOpen <$> H.openDbQueue db SQL.containedTable open db = liftIO $ DbOpen <$> H.openDbQueue db SQL.containedTable
addAssociatedFile :: Key -> TopFilePath -> Annex () addAssociatedFile :: Key -> TopFilePath -> Annex ()
addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile (toSKey k) f addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile (toIKey k) f
{- Note that the files returned were once associated with the key, but {- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -} - some of them may not be any longer. -}
getAssociatedFiles :: Key -> Annex [TopFilePath] getAssociatedFiles :: Key -> Annex [TopFilePath]
getAssociatedFiles = runReaderIO . SQL.getAssociatedFiles . toSKey getAssociatedFiles = runReaderIO . SQL.getAssociatedFiles . toIKey
{- Gets any keys that are on record as having a particular associated file. {- Gets any keys that are on record as having a particular associated file.
- (Should be one or none but the database doesn't enforce that.) -} - (Should be one or none but the database doesn't enforce that.) -}
getAssociatedKey :: TopFilePath -> Annex [Key] getAssociatedKey :: TopFilePath -> Annex [Key]
getAssociatedKey = map fromSKey <$$> runReaderIO . SQL.getAssociatedKey getAssociatedKey = map fromIKey <$$> runReaderIO . SQL.getAssociatedKey
removeAssociatedFile :: Key -> TopFilePath -> Annex () removeAssociatedFile :: Key -> TopFilePath -> Annex ()
removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile (toSKey k) removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile (toIKey k)
{- Find all unlocked associated files. This is expensive, and so normally {- Find all unlocked associated files. This is expensive, and so normally
- the associated files are updated incrementally when changes are noticed. -} - the associated files are updated incrementally when changes are noticed. -}
@ -168,7 +168,7 @@ scanAssociatedFiles = whenM (isJust <$> inRepo Git.Branch.current) $
isregfile i = Git.Types.toBlobType (Git.LsTree.mode i) == Just Git.Types.FileBlob isregfile i = Git.Types.toBlobType (Git.LsTree.mode i) == Just Git.Types.FileBlob
add h i k = liftIO $ flip SQL.queueDb h $ add h i k = liftIO $ flip SQL.queueDb h $
void $ insertUnique $ SQL.Associated void $ insertUnique $ SQL.Associated
(toSKey k) (toIKey k)
(getTopFilePath $ Git.LsTree.file i) (getTopFilePath $ Git.LsTree.file i)
{- Stats the files, and stores their InodeCaches. -} {- Stats the files, and stores their InodeCaches. -}
@ -177,12 +177,12 @@ storeInodeCaches k fs = withTSDelta $ \d ->
addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs) addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs)
addInodeCaches :: Key -> [InodeCache] -> Annex () addInodeCaches :: Key -> [InodeCache] -> Annex ()
addInodeCaches k is = runWriterIO $ SQL.addInodeCaches (toSKey k) is addInodeCaches k is = runWriterIO $ SQL.addInodeCaches (toIKey k) is
{- A key may have multiple InodeCaches; one for the annex object, and one {- A key may have multiple InodeCaches; one for the annex object, and one
- for each pointer file that is a copy of it. -} - for each pointer file that is a copy of it. -}
getInodeCaches :: Key -> Annex [InodeCache] getInodeCaches :: Key -> Annex [InodeCache]
getInodeCaches = runReaderIO . SQL.getInodeCaches . toSKey getInodeCaches = runReaderIO . SQL.getInodeCaches . toIKey
removeInodeCaches :: Key -> Annex () removeInodeCaches :: Key -> Annex ()
removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toSKey removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toIKey

View file

@ -25,12 +25,12 @@ import Control.Monad
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
Associated Associated
key SKey key IKey
file FilePath file FilePath
KeyFileIndex key file KeyFileIndex key file
FileKeyIndex file key FileKeyIndex file key
Content Content
key SKey key IKey
cache SInodeCache cache SInodeCache
KeyCacheIndex key cache KeyCacheIndex key cache
|] |]
@ -58,51 +58,51 @@ queueDb a (WriteHandle h) = H.queueDb h checkcommit a
now <- getCurrentTime now <- getCurrentTime
return $ diffUTCTime lastcommittime now > 300 return $ diffUTCTime lastcommittime now > 300
addAssociatedFile :: SKey -> TopFilePath -> WriteHandle -> IO () addAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO ()
addAssociatedFile sk f = queueDb $ do addAssociatedFile ik f = queueDb $ do
-- If the same file was associated with a different key before, -- If the same file was associated with a different key before,
-- remove that. -- remove that.
delete $ from $ \r -> do delete $ from $ \r -> do
where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val sk)) where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val ik))
void $ insertUnique $ Associated sk (getTopFilePath f) void $ insertUnique $ Associated ik (getTopFilePath f)
{- Note that the files returned were once associated with the key, but {- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -} - some of them may not be any longer. -}
getAssociatedFiles :: SKey -> ReadHandle -> IO [TopFilePath] getAssociatedFiles :: IKey -> ReadHandle -> IO [TopFilePath]
getAssociatedFiles sk = readDb $ do getAssociatedFiles ik = readDb $ do
l <- select $ from $ \r -> do l <- select $ from $ \r -> do
where_ (r ^. AssociatedKey ==. val sk) where_ (r ^. AssociatedKey ==. val ik)
return (r ^. AssociatedFile) return (r ^. AssociatedFile)
return $ map (asTopFilePath . unValue) l return $ map (asTopFilePath . unValue) l
{- Gets any keys that are on record as having a particular associated file. {- Gets any keys that are on record as having a particular associated file.
- (Should be one or none but the database doesn't enforce that.) -} - (Should be one or none but the database doesn't enforce that.) -}
getAssociatedKey :: TopFilePath -> ReadHandle -> IO [SKey] getAssociatedKey :: TopFilePath -> ReadHandle -> IO [IKey]
getAssociatedKey f = readDb $ do getAssociatedKey f = readDb $ do
l <- select $ from $ \r -> do l <- select $ from $ \r -> do
where_ (r ^. AssociatedFile ==. val (getTopFilePath f)) where_ (r ^. AssociatedFile ==. val (getTopFilePath f))
return (r ^. AssociatedKey) return (r ^. AssociatedKey)
return $ map unValue l return $ map unValue l
removeAssociatedFile :: SKey -> TopFilePath -> WriteHandle -> IO () removeAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO ()
removeAssociatedFile sk f = queueDb $ removeAssociatedFile ik f = queueDb $
delete $ from $ \r -> do delete $ from $ \r -> do
where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val (getTopFilePath f)) where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val (getTopFilePath f))
addInodeCaches :: SKey -> [InodeCache] -> WriteHandle -> IO () addInodeCaches :: IKey -> [InodeCache] -> WriteHandle -> IO ()
addInodeCaches sk is = queueDb $ addInodeCaches ik is = queueDb $
forM_ is $ \i -> insertUnique $ Content sk (toSInodeCache i) forM_ is $ \i -> insertUnique $ Content ik (toSInodeCache i)
{- A key may have multiple InodeCaches; one for the annex object, and one {- A key may have multiple InodeCaches; one for the annex object, and one
- for each pointer file that is a copy of it. -} - for each pointer file that is a copy of it. -}
getInodeCaches :: SKey -> ReadHandle -> IO [InodeCache] getInodeCaches :: IKey -> ReadHandle -> IO [InodeCache]
getInodeCaches sk = readDb $ do getInodeCaches ik = readDb $ do
l <- select $ from $ \r -> do l <- select $ from $ \r -> do
where_ (r ^. ContentKey ==. val sk) where_ (r ^. ContentKey ==. val ik)
return (r ^. ContentCache) return (r ^. ContentCache)
return $ map (fromSInodeCache. unValue) l return $ map (fromSInodeCache. unValue) l
removeInodeCaches :: SKey -> WriteHandle -> IO () removeInodeCaches :: IKey -> WriteHandle -> IO ()
removeInodeCaches sk = queueDb $ removeInodeCaches ik = queueDb $
delete $ from $ \r -> do delete $ from $ \r -> do
where_ (r ^. ContentKey ==. val sk) where_ (r ^. ContentKey ==. val ik)

View file

@ -27,6 +27,24 @@ fromSKey (SKey s) = fromMaybe (error $ "bad serialied Key " ++ s) (file2key s)
derivePersistField "SKey" derivePersistField "SKey"
-- A Key index. More efficient than SKey, but its Read instance does not
-- work when it's used in any kind of complex data structure.
newtype IKey = IKey String
instance Read IKey where
readsPrec _ s = [(IKey s, "")]
instance Show IKey where
show (IKey s) = s
toIKey :: Key -> IKey
toIKey = IKey . key2file
fromIKey :: IKey -> Key
fromIKey (IKey s) = fromMaybe (error $ "bad serialied Key " ++ s) (file2key s)
derivePersistField "IKey"
-- A serialized InodeCache -- A serialized InodeCache
newtype SInodeCache = I String newtype SInodeCache = I String
deriving (Show, Read) deriving (Show, Read)