git-annex/Database/Keys/SQL.hs
Joey Hess 423fffcd41
change keys database to use IKey type with more efficient serialization
This breaks any existing keys database!

IKey serializes more efficiently than SKey, although this limits the
use of its Read/Show instances.

This makes the keys database use less disk space, and so should be a win.

Updated benchmark:

benchmarking keys database/getAssociatedFiles from 1000 (hit)
time                 64.04 μs   (63.95 μs .. 64.13 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 64.02 μs   (63.96 μs .. 64.08 μs)
std dev              218.2 ns   (172.5 ns .. 299.3 ns)

benchmarking keys database/getAssociatedFiles from 1000 (miss)
time                 52.53 μs   (52.18 μs .. 53.21 μs)
                     0.999 R²   (0.998 R² .. 1.000 R²)
mean                 52.31 μs   (52.18 μs .. 52.91 μs)
std dev              734.6 ns   (206.2 ns .. 1.623 μs)

benchmarking keys database/getAssociatedKey from 1000 (hit)
time                 64.60 μs   (64.46 μs .. 64.77 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 64.74 μs   (64.57 μs .. 65.20 μs)
std dev              900.2 ns   (389.7 ns .. 1.733 μs)

benchmarking keys database/getAssociatedKey from 1000 (miss)
time                 52.46 μs   (52.29 μs .. 52.68 μs)
                     1.000 R²   (0.999 R² .. 1.000 R²)
mean                 52.63 μs   (52.35 μs .. 53.37 μs)
std dev              1.362 μs   (562.7 ns .. 2.608 μs)
variance introduced by outliers: 24% (moderately inflated)

benchmarking keys database/addAssociatedFile to 1000 (old)
time                 487.3 μs   (484.7 μs .. 490.1 μs)
                     1.000 R²   (0.999 R² .. 1.000 R²)
mean                 490.9 μs   (487.8 μs .. 496.5 μs)
std dev              13.95 μs   (6.841 μs .. 22.03 μs)
variance introduced by outliers: 20% (moderately inflated)

benchmarking keys database/addAssociatedFile to 1000 (new)
time                 6.633 ms   (5.741 ms .. 7.751 ms)
                     0.905 R²   (0.850 R² .. 0.965 R²)
mean                 8.252 ms   (7.803 ms .. 8.602 ms)
std dev              1.126 ms   (900.3 μs .. 1.430 ms)
variance introduced by outliers: 72% (severely inflated)

benchmarking keys database/getAssociatedFiles from 10000 (hit)
time                 65.36 μs   (64.71 μs .. 66.37 μs)
                     0.998 R²   (0.995 R² .. 1.000 R²)
mean                 65.28 μs   (64.72 μs .. 66.45 μs)
std dev              2.576 μs   (920.8 ns .. 4.122 μs)
variance introduced by outliers: 42% (moderately inflated)

benchmarking keys database/getAssociatedFiles from 10000 (miss)
time                 52.34 μs   (52.25 μs .. 52.45 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 52.49 μs   (52.42 μs .. 52.59 μs)
std dev              255.4 ns   (205.8 ns .. 312.9 ns)

benchmarking keys database/getAssociatedKey from 10000 (hit)
time                 64.76 μs   (64.67 μs .. 64.84 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 64.67 μs   (64.62 μs .. 64.72 μs)
std dev              177.3 ns   (148.1 ns .. 217.1 ns)

benchmarking keys database/getAssociatedKey from 10000 (miss)
time                 52.75 μs   (52.66 μs .. 52.82 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 52.69 μs   (52.63 μs .. 52.75 μs)
std dev              210.6 ns   (173.7 ns .. 265.9 ns)

benchmarking keys database/addAssociatedFile to 10000 (old)
time                 489.7 μs   (488.7 μs .. 490.7 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 490.4 μs   (489.6 μs .. 492.2 μs)
std dev              3.990 μs   (2.435 μs .. 7.604 μs)

benchmarking keys database/addAssociatedFile to 10000 (new)
time                 9.994 ms   (9.186 ms .. 10.74 ms)
                     0.959 R²   (0.928 R² .. 0.979 R²)
mean                 9.906 ms   (9.343 ms .. 10.40 ms)
std dev              1.384 ms   (1.051 ms .. 2.100 ms)
variance introduced by outliers: 69% (severely inflated)
2016-01-12 14:01:50 -04:00

108 lines
3.5 KiB
Haskell
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{- Sqlite database of information about Keys
-
- Copyright 2015-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes, ScopedTypeVariables #-}
module Database.Keys.SQL where
import Database.Types
import Database.Handle
import qualified Database.Queue as H
import Utility.InodeCache
import Git.FilePath
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
import Data.Time.Clock
import Control.Monad
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
Associated
key IKey
file FilePath
KeyFileIndex key file
FileKeyIndex file key
Content
key IKey
cache SInodeCache
KeyCacheIndex key cache
|]
containedTable :: TableName
containedTable = "content"
createTables :: SqlPersistM ()
createTables = void $ runMigrationSilent migrateKeysDb
newtype ReadHandle = ReadHandle H.DbQueue
readDb :: SqlPersistM a -> ReadHandle -> IO a
readDb a (ReadHandle h) = H.queryDbQueue h a
newtype WriteHandle = WriteHandle H.DbQueue
queueDb :: SqlPersistM () -> WriteHandle -> IO ()
queueDb a (WriteHandle h) = H.queueDb h checkcommit a
where
-- commit queue after 1000 changes or 5 minutes, whichever comes first
checkcommit sz lastcommittime
| sz > 1000 = return True
| otherwise = do
now <- getCurrentTime
return $ diffUTCTime lastcommittime now > 300
addAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO ()
addAssociatedFile ik f = queueDb $ do
-- If the same file was associated with a different key before,
-- remove that.
delete $ from $ \r -> do
where_ (r ^. AssociatedFile ==. val (getTopFilePath f) &&. not_ (r ^. AssociatedKey ==. val ik))
void $ insertUnique $ Associated ik (getTopFilePath f)
{- Note that the files returned were once associated with the key, but
- some of them may not be any longer. -}
getAssociatedFiles :: IKey -> ReadHandle -> IO [TopFilePath]
getAssociatedFiles ik = readDb $ do
l <- select $ from $ \r -> do
where_ (r ^. AssociatedKey ==. val ik)
return (r ^. AssociatedFile)
return $ map (asTopFilePath . unValue) l
{- Gets any keys that are on record as having a particular associated file.
- (Should be one or none but the database doesn't enforce that.) -}
getAssociatedKey :: TopFilePath -> ReadHandle -> IO [IKey]
getAssociatedKey f = readDb $ do
l <- select $ from $ \r -> do
where_ (r ^. AssociatedFile ==. val (getTopFilePath f))
return (r ^. AssociatedKey)
return $ map unValue l
removeAssociatedFile :: IKey -> TopFilePath -> WriteHandle -> IO ()
removeAssociatedFile ik f = queueDb $
delete $ from $ \r -> do
where_ (r ^. AssociatedKey ==. val ik &&. r ^. AssociatedFile ==. val (getTopFilePath f))
addInodeCaches :: IKey -> [InodeCache] -> WriteHandle -> IO ()
addInodeCaches ik is = queueDb $
forM_ is $ \i -> insertUnique $ Content ik (toSInodeCache i)
{- A key may have multiple InodeCaches; one for the annex object, and one
- for each pointer file that is a copy of it. -}
getInodeCaches :: IKey -> ReadHandle -> IO [InodeCache]
getInodeCaches ik = readDb $ do
l <- select $ from $ \r -> do
where_ (r ^. ContentKey ==. val ik)
return (r ^. ContentCache)
return $ map (fromSInodeCache. unValue) l
removeInodeCaches :: IKey -> WriteHandle -> IO ()
removeInodeCaches ik = queueDb $
delete $ from $ \r -> do
where_ (r ^. ContentKey ==. val ik)