From 423fffcd4171cfbb81bc615487a29f0227d652d9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 12 Jan 2016 14:01:50 -0400 Subject: [PATCH] change keys database to use IKey type with more efficient serialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This breaks any existing keys database! IKey serializes more efficiently than SKey, although this limits the use of its Read/Show instances. This makes the keys database use less disk space, and so should be a win. Updated benchmark: benchmarking keys database/getAssociatedFiles from 1000 (hit) time 64.04 μs (63.95 μs .. 64.13 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.02 μs (63.96 μs .. 64.08 μs) std dev 218.2 ns (172.5 ns .. 299.3 ns) benchmarking keys database/getAssociatedFiles from 1000 (miss) time 52.53 μs (52.18 μs .. 53.21 μs) 0.999 R² (0.998 R² .. 1.000 R²) mean 52.31 μs (52.18 μs .. 52.91 μs) std dev 734.6 ns (206.2 ns .. 1.623 μs) benchmarking keys database/getAssociatedKey from 1000 (hit) time 64.60 μs (64.46 μs .. 64.77 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.74 μs (64.57 μs .. 65.20 μs) std dev 900.2 ns (389.7 ns .. 1.733 μs) benchmarking keys database/getAssociatedKey from 1000 (miss) time 52.46 μs (52.29 μs .. 52.68 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 52.63 μs (52.35 μs .. 53.37 μs) std dev 1.362 μs (562.7 ns .. 2.608 μs) variance introduced by outliers: 24% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (old) time 487.3 μs (484.7 μs .. 490.1 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 490.9 μs (487.8 μs .. 496.5 μs) std dev 13.95 μs (6.841 μs .. 22.03 μs) variance introduced by outliers: 20% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (new) time 6.633 ms (5.741 ms .. 7.751 ms) 0.905 R² (0.850 R² .. 0.965 R²) mean 8.252 ms (7.803 ms .. 8.602 ms) std dev 1.126 ms (900.3 μs .. 1.430 ms) variance introduced by outliers: 72% (severely inflated) benchmarking keys database/getAssociatedFiles from 10000 (hit) time 65.36 μs (64.71 μs .. 66.37 μs) 0.998 R² (0.995 R² .. 1.000 R²) mean 65.28 μs (64.72 μs .. 66.45 μs) std dev 2.576 μs (920.8 ns .. 4.122 μs) variance introduced by outliers: 42% (moderately inflated) benchmarking keys database/getAssociatedFiles from 10000 (miss) time 52.34 μs (52.25 μs .. 52.45 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.49 μs (52.42 μs .. 52.59 μs) std dev 255.4 ns (205.8 ns .. 312.9 ns) benchmarking keys database/getAssociatedKey from 10000 (hit) time 64.76 μs (64.67 μs .. 64.84 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.67 μs (64.62 μs .. 64.72 μs) std dev 177.3 ns (148.1 ns .. 217.1 ns) benchmarking keys database/getAssociatedKey from 10000 (miss) time 52.75 μs (52.66 μs .. 52.82 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.69 μs (52.63 μs .. 52.75 μs) std dev 210.6 ns (173.7 ns .. 265.9 ns) benchmarking keys database/addAssociatedFile to 10000 (old) time 489.7 μs (488.7 μs .. 490.7 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 490.4 μs (489.6 μs .. 492.2 μs) std dev 3.990 μs (2.435 μs .. 7.604 μs) benchmarking keys database/addAssociatedFile to 10000 (new) time 9.994 ms (9.186 ms .. 10.74 ms) 0.959 R² (0.928 R² .. 0.979 R²) mean 9.906 ms (9.343 ms .. 10.40 ms) std dev 1.384 ms (1.051 ms .. 2.100 ms) variance introduced by outliers: 69% (severely inflated) --- Command/Benchmark.hs | 10 +++++----- Database/Keys.hs | 16 ++++++++-------- Database/Keys/SQL.hs | 44 ++++++++++++++++++++++---------------------- Database/Types.hs | 18 ++++++++++++++++++ 4 files changed, 53 insertions(+), 35 deletions(-) diff --git a/Command/Benchmark.hs b/Command/Benchmark.hs index b1cc9a830f..bcfecc2dc1 100644 --- a/Command/Benchmark.hs +++ b/Command/Benchmark.hs @@ -87,13 +87,13 @@ populateAssociatedFiles h num = do SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h) H.flushDbQueue h -keyN :: Int -> SKey -keyN n = SKey ("key" ++ show n) +keyN :: Int -> IKey +keyN n = IKey ("key" ++ show n) fileN :: Int -> TopFilePath fileN n = asTopFilePath ("file" ++ show n) -keyMiss :: SKey +keyMiss :: IKey keyMiss = keyN 0 -- 0 is never stored fileMiss :: TopFilePath @@ -114,8 +114,8 @@ benchDb tmpdir num = do instance NFData TopFilePath where rnf = rnf . getTopFilePath -instance NFData SKey where - rnf (SKey s) = rnf s +instance NFData IKey where + rnf (IKey s) = rnf s -- can't use Criterion's defaultMain here because it looks at -- command-line parameters diff --git a/Database/Keys.hs b/Database/Keys.hs index 89410e741d..fdba05312a 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -133,20 +133,20 @@ openDb createdb _ = withExclusiveLock gitAnnexKeysDbLock $ do open db = liftIO $ DbOpen <$> H.openDbQueue db SQL.containedTable addAssociatedFile :: Key -> TopFilePath -> Annex () -addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile (toSKey k) f +addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile (toIKey k) f {- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} getAssociatedFiles :: Key -> Annex [TopFilePath] -getAssociatedFiles = runReaderIO . SQL.getAssociatedFiles . toSKey +getAssociatedFiles = runReaderIO . SQL.getAssociatedFiles . toIKey {- Gets any keys that are on record as having a particular associated file. - (Should be one or none but the database doesn't enforce that.) -} getAssociatedKey :: TopFilePath -> Annex [Key] -getAssociatedKey = map fromSKey <$$> runReaderIO . SQL.getAssociatedKey +getAssociatedKey = map fromIKey <$$> runReaderIO . SQL.getAssociatedKey removeAssociatedFile :: Key -> TopFilePath -> Annex () -removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile (toSKey k) +removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile (toIKey k) {- Find all unlocked associated files. This is expensive, and so normally - the associated files are updated incrementally when changes are noticed. -} @@ -168,7 +168,7 @@ scanAssociatedFiles = whenM (isJust <$> inRepo Git.Branch.current) $ isregfile i = Git.Types.toBlobType (Git.LsTree.mode i) == Just Git.Types.FileBlob add h i k = liftIO $ flip SQL.queueDb h $ void $ insertUnique $ SQL.Associated - (toSKey k) + (toIKey k) (getTopFilePath $ Git.LsTree.file i) {- Stats the files, and stores their InodeCaches. -} @@ -177,12 +177,12 @@ storeInodeCaches k fs = withTSDelta $ \d -> addInodeCaches k . catMaybes =<< liftIO (mapM (`genInodeCache` d) fs) addInodeCaches :: Key -> [InodeCache] -> Annex () -addInodeCaches k is = runWriterIO $ SQL.addInodeCaches (toSKey k) is +addInodeCaches k is = runWriterIO $ SQL.addInodeCaches (toIKey k) is {- A key may have multiple InodeCaches; one for the annex object, and one - for each pointer file that is a copy of it. -} getInodeCaches :: Key -> Annex [InodeCache] -getInodeCaches = runReaderIO . SQL.getInodeCaches . toSKey +getInodeCaches = runReaderIO . SQL.getInodeCaches . toIKey removeInodeCaches :: Key -> Annex () -removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toSKey +removeInodeCaches = runWriterIO . SQL.removeInodeCaches . toIKey diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs index 79230b60db..456b48e462 100644 --- a/Database/Keys/SQL.hs +++ b/Database/Keys/SQL.hs @@ -25,12 +25,12 @@ import Control.Monad share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| Associated - key SKey + key IKey file FilePath KeyFileIndex key file FileKeyIndex file key Content - key SKey + key IKey cache SInodeCache KeyCacheIndex key cache |] @@ -58,51 +58,51 @@ queueDb a (WriteHandle h) = H.queueDb h checkcommit a now <- getCurrentTime return $ diffUTCTime lastcommittime now > 300 -addAssociatedFile :: SKey -> TopFilePath -> WriteHandle -> IO () -addAssociatedFile sk f = queueDb $ do +addAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO () +addAssociatedFile ik f = queueDb $ do -- If the same file was associated with a different key before, -- remove that. delete $ from $ \r -> do - where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val sk)) - void $ insertUnique $ Associated sk (getTopFilePath f) + where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val ik)) + void $ insertUnique $ Associated ik (getTopFilePath f) {- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} -getAssociatedFiles :: SKey -> ReadHandle -> IO [TopFilePath] -getAssociatedFiles sk = readDb $ do +getAssociatedFiles :: IKey -> ReadHandle -> IO [TopFilePath] +getAssociatedFiles ik = readDb $ do l <- select $ from $ \r -> do - where_ (r ^. AssociatedKey ==. val sk) + where_ (r ^. AssociatedKey ==. val ik) return (r ^. AssociatedFile) return $ map (asTopFilePath . unValue) l {- Gets any keys that are on record as having a particular associated file. - (Should be one or none but the database doesn't enforce that.) -} -getAssociatedKey :: TopFilePath -> ReadHandle -> IO [SKey] +getAssociatedKey :: TopFilePath -> ReadHandle -> IO [IKey] getAssociatedKey f = readDb $ do l <- select $ from $ \r -> do where_ (r ^. AssociatedFile ==. val (getTopFilePath f)) return (r ^. AssociatedKey) return $ map unValue l -removeAssociatedFile :: SKey -> TopFilePath -> WriteHandle -> IO () -removeAssociatedFile sk f = queueDb $ +removeAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO () +removeAssociatedFile ik f = queueDb $ delete $ from $ \r -> do - where_ (r ^. AssociatedKey ==. val sk &&. r ^. AssociatedFile ==. val (getTopFilePath f)) + where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val (getTopFilePath f)) -addInodeCaches :: SKey -> [InodeCache] -> WriteHandle -> IO () -addInodeCaches sk is = queueDb $ - forM_ is $ \i -> insertUnique $ Content sk (toSInodeCache i) +addInodeCaches :: IKey -> [InodeCache] -> WriteHandle -> IO () +addInodeCaches ik is = queueDb $ + forM_ is $ \i -> insertUnique $ Content ik (toSInodeCache i) {- A key may have multiple InodeCaches; one for the annex object, and one - for each pointer file that is a copy of it. -} -getInodeCaches :: SKey -> ReadHandle -> IO [InodeCache] -getInodeCaches sk = readDb $ do +getInodeCaches :: IKey -> ReadHandle -> IO [InodeCache] +getInodeCaches ik = readDb $ do l <- select $ from $ \r -> do - where_ (r ^. ContentKey ==. val sk) + where_ (r ^. ContentKey ==. val ik) return (r ^. ContentCache) return $ map (fromSInodeCache . unValue) l -removeInodeCaches :: SKey -> WriteHandle -> IO () -removeInodeCaches sk = queueDb $ +removeInodeCaches :: IKey -> WriteHandle -> IO () +removeInodeCaches ik = queueDb $ delete $ from $ \r -> do - where_ (r ^. ContentKey ==. val sk) + where_ (r ^. ContentKey ==. val ik) diff --git a/Database/Types.hs b/Database/Types.hs index 1476a693ae..6667bc343b 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -27,6 +27,24 @@ fromSKey (SKey s) = fromMaybe (error $ "bad serialied Key " ++ s) (file2key s) derivePersistField "SKey" +-- A Key index. More efficient than SKey, but its Read instance does not +-- work when it's used in any kind of complex data structure. +newtype IKey = IKey String + +instance Read IKey where + readsPrec _ s = [(IKey s, "")] + +instance Show IKey where + show (IKey s) = s + +toIKey :: Key -> IKey +toIKey = IKey . key2file + +fromIKey :: IKey -> Key +fromIKey (IKey s) = fromMaybe (error $ "bad serialied Key " ++ s) (file2key s) + +derivePersistField "IKey" + -- A serialized InodeCache newtype SInodeCache = I String deriving (Show, Read)