git-annex/Database/Types.hs

149 lines
3.5 KiB
Haskell
Raw Normal View History

{- types for SQL databases
-
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE TemplateHaskell #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Database.Types where
import Database.Persist.TH
import Database.Persist.Class hiding (Key)
import Database.Persist.Sql hiding (Key)
import Data.Maybe
import Data.Char
import qualified Data.ByteString as S
import qualified Data.Text as T
import Control.DeepSeq
import Utility.PartialPrelude
2017-02-24 17:42:30 +00:00
import Key
import Utility.InodeCache
import Git.Types (Ref(..))
import Types.UUID
import Types.Import
-- A serialized Key
newtype SKey = SKey String
deriving (Show, Read)
toSKey :: Key -> SKey
toSKey = SKey . serializeKey
fromSKey :: SKey -> Key
fromSKey (SKey s) = fromMaybe (error $ "bad serialized Key " ++ s) (deserializeKey s)
derivePersistField "SKey"
change keys database to use IKey type with more efficient serialization This breaks any existing keys database! IKey serializes more efficiently than SKey, although this limits the use of its Read/Show instances. This makes the keys database use less disk space, and so should be a win. Updated benchmark: benchmarking keys database/getAssociatedFiles from 1000 (hit) time 64.04 μs (63.95 μs .. 64.13 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.02 μs (63.96 μs .. 64.08 μs) std dev 218.2 ns (172.5 ns .. 299.3 ns) benchmarking keys database/getAssociatedFiles from 1000 (miss) time 52.53 μs (52.18 μs .. 53.21 μs) 0.999 R² (0.998 R² .. 1.000 R²) mean 52.31 μs (52.18 μs .. 52.91 μs) std dev 734.6 ns (206.2 ns .. 1.623 μs) benchmarking keys database/getAssociatedKey from 1000 (hit) time 64.60 μs (64.46 μs .. 64.77 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.74 μs (64.57 μs .. 65.20 μs) std dev 900.2 ns (389.7 ns .. 1.733 μs) benchmarking keys database/getAssociatedKey from 1000 (miss) time 52.46 μs (52.29 μs .. 52.68 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 52.63 μs (52.35 μs .. 53.37 μs) std dev 1.362 μs (562.7 ns .. 2.608 μs) variance introduced by outliers: 24% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (old) time 487.3 μs (484.7 μs .. 490.1 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 490.9 μs (487.8 μs .. 496.5 μs) std dev 13.95 μs (6.841 μs .. 22.03 μs) variance introduced by outliers: 20% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (new) time 6.633 ms (5.741 ms .. 7.751 ms) 0.905 R² (0.850 R² .. 0.965 R²) mean 8.252 ms (7.803 ms .. 8.602 ms) std dev 1.126 ms (900.3 μs .. 1.430 ms) variance introduced by outliers: 72% (severely inflated) benchmarking keys database/getAssociatedFiles from 10000 (hit) time 65.36 μs (64.71 μs .. 66.37 μs) 0.998 R² (0.995 R² .. 1.000 R²) mean 65.28 μs (64.72 μs .. 66.45 μs) std dev 2.576 μs (920.8 ns .. 4.122 μs) variance introduced by outliers: 42% (moderately inflated) benchmarking keys database/getAssociatedFiles from 10000 (miss) time 52.34 μs (52.25 μs .. 52.45 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.49 μs (52.42 μs .. 52.59 μs) std dev 255.4 ns (205.8 ns .. 312.9 ns) benchmarking keys database/getAssociatedKey from 10000 (hit) time 64.76 μs (64.67 μs .. 64.84 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.67 μs (64.62 μs .. 64.72 μs) std dev 177.3 ns (148.1 ns .. 217.1 ns) benchmarking keys database/getAssociatedKey from 10000 (miss) time 52.75 μs (52.66 μs .. 52.82 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.69 μs (52.63 μs .. 52.75 μs) std dev 210.6 ns (173.7 ns .. 265.9 ns) benchmarking keys database/addAssociatedFile to 10000 (old) time 489.7 μs (488.7 μs .. 490.7 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 490.4 μs (489.6 μs .. 492.2 μs) std dev 3.990 μs (2.435 μs .. 7.604 μs) benchmarking keys database/addAssociatedFile to 10000 (new) time 9.994 ms (9.186 ms .. 10.74 ms) 0.959 R² (0.928 R² .. 0.979 R²) mean 9.906 ms (9.343 ms .. 10.40 ms) std dev 1.384 ms (1.051 ms .. 2.100 ms) variance introduced by outliers: 69% (severely inflated)
2016-01-12 18:01:50 +00:00
-- A Key index. More efficient than SKey, but its Read instance does not
-- work when it's used in any kind of complex data structure.
newtype IKey = IKey String
instance NFData IKey where
rnf (IKey s) = rnf s
change keys database to use IKey type with more efficient serialization This breaks any existing keys database! IKey serializes more efficiently than SKey, although this limits the use of its Read/Show instances. This makes the keys database use less disk space, and so should be a win. Updated benchmark: benchmarking keys database/getAssociatedFiles from 1000 (hit) time 64.04 μs (63.95 μs .. 64.13 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.02 μs (63.96 μs .. 64.08 μs) std dev 218.2 ns (172.5 ns .. 299.3 ns) benchmarking keys database/getAssociatedFiles from 1000 (miss) time 52.53 μs (52.18 μs .. 53.21 μs) 0.999 R² (0.998 R² .. 1.000 R²) mean 52.31 μs (52.18 μs .. 52.91 μs) std dev 734.6 ns (206.2 ns .. 1.623 μs) benchmarking keys database/getAssociatedKey from 1000 (hit) time 64.60 μs (64.46 μs .. 64.77 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.74 μs (64.57 μs .. 65.20 μs) std dev 900.2 ns (389.7 ns .. 1.733 μs) benchmarking keys database/getAssociatedKey from 1000 (miss) time 52.46 μs (52.29 μs .. 52.68 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 52.63 μs (52.35 μs .. 53.37 μs) std dev 1.362 μs (562.7 ns .. 2.608 μs) variance introduced by outliers: 24% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (old) time 487.3 μs (484.7 μs .. 490.1 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 490.9 μs (487.8 μs .. 496.5 μs) std dev 13.95 μs (6.841 μs .. 22.03 μs) variance introduced by outliers: 20% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (new) time 6.633 ms (5.741 ms .. 7.751 ms) 0.905 R² (0.850 R² .. 0.965 R²) mean 8.252 ms (7.803 ms .. 8.602 ms) std dev 1.126 ms (900.3 μs .. 1.430 ms) variance introduced by outliers: 72% (severely inflated) benchmarking keys database/getAssociatedFiles from 10000 (hit) time 65.36 μs (64.71 μs .. 66.37 μs) 0.998 R² (0.995 R² .. 1.000 R²) mean 65.28 μs (64.72 μs .. 66.45 μs) std dev 2.576 μs (920.8 ns .. 4.122 μs) variance introduced by outliers: 42% (moderately inflated) benchmarking keys database/getAssociatedFiles from 10000 (miss) time 52.34 μs (52.25 μs .. 52.45 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.49 μs (52.42 μs .. 52.59 μs) std dev 255.4 ns (205.8 ns .. 312.9 ns) benchmarking keys database/getAssociatedKey from 10000 (hit) time 64.76 μs (64.67 μs .. 64.84 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.67 μs (64.62 μs .. 64.72 μs) std dev 177.3 ns (148.1 ns .. 217.1 ns) benchmarking keys database/getAssociatedKey from 10000 (miss) time 52.75 μs (52.66 μs .. 52.82 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.69 μs (52.63 μs .. 52.75 μs) std dev 210.6 ns (173.7 ns .. 265.9 ns) benchmarking keys database/addAssociatedFile to 10000 (old) time 489.7 μs (488.7 μs .. 490.7 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 490.4 μs (489.6 μs .. 492.2 μs) std dev 3.990 μs (2.435 μs .. 7.604 μs) benchmarking keys database/addAssociatedFile to 10000 (new) time 9.994 ms (9.186 ms .. 10.74 ms) 0.959 R² (0.928 R² .. 0.979 R²) mean 9.906 ms (9.343 ms .. 10.40 ms) std dev 1.384 ms (1.051 ms .. 2.100 ms) variance introduced by outliers: 69% (severely inflated)
2016-01-12 18:01:50 +00:00
instance Read IKey where
readsPrec _ s = [(IKey s, "")]
instance Show IKey where
show (IKey s) = s
toIKey :: Key -> IKey
toIKey = IKey . serializeKey
change keys database to use IKey type with more efficient serialization This breaks any existing keys database! IKey serializes more efficiently than SKey, although this limits the use of its Read/Show instances. This makes the keys database use less disk space, and so should be a win. Updated benchmark: benchmarking keys database/getAssociatedFiles from 1000 (hit) time 64.04 μs (63.95 μs .. 64.13 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.02 μs (63.96 μs .. 64.08 μs) std dev 218.2 ns (172.5 ns .. 299.3 ns) benchmarking keys database/getAssociatedFiles from 1000 (miss) time 52.53 μs (52.18 μs .. 53.21 μs) 0.999 R² (0.998 R² .. 1.000 R²) mean 52.31 μs (52.18 μs .. 52.91 μs) std dev 734.6 ns (206.2 ns .. 1.623 μs) benchmarking keys database/getAssociatedKey from 1000 (hit) time 64.60 μs (64.46 μs .. 64.77 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.74 μs (64.57 μs .. 65.20 μs) std dev 900.2 ns (389.7 ns .. 1.733 μs) benchmarking keys database/getAssociatedKey from 1000 (miss) time 52.46 μs (52.29 μs .. 52.68 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 52.63 μs (52.35 μs .. 53.37 μs) std dev 1.362 μs (562.7 ns .. 2.608 μs) variance introduced by outliers: 24% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (old) time 487.3 μs (484.7 μs .. 490.1 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 490.9 μs (487.8 μs .. 496.5 μs) std dev 13.95 μs (6.841 μs .. 22.03 μs) variance introduced by outliers: 20% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (new) time 6.633 ms (5.741 ms .. 7.751 ms) 0.905 R² (0.850 R² .. 0.965 R²) mean 8.252 ms (7.803 ms .. 8.602 ms) std dev 1.126 ms (900.3 μs .. 1.430 ms) variance introduced by outliers: 72% (severely inflated) benchmarking keys database/getAssociatedFiles from 10000 (hit) time 65.36 μs (64.71 μs .. 66.37 μs) 0.998 R² (0.995 R² .. 1.000 R²) mean 65.28 μs (64.72 μs .. 66.45 μs) std dev 2.576 μs (920.8 ns .. 4.122 μs) variance introduced by outliers: 42% (moderately inflated) benchmarking keys database/getAssociatedFiles from 10000 (miss) time 52.34 μs (52.25 μs .. 52.45 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.49 μs (52.42 μs .. 52.59 μs) std dev 255.4 ns (205.8 ns .. 312.9 ns) benchmarking keys database/getAssociatedKey from 10000 (hit) time 64.76 μs (64.67 μs .. 64.84 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.67 μs (64.62 μs .. 64.72 μs) std dev 177.3 ns (148.1 ns .. 217.1 ns) benchmarking keys database/getAssociatedKey from 10000 (miss) time 52.75 μs (52.66 μs .. 52.82 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.69 μs (52.63 μs .. 52.75 μs) std dev 210.6 ns (173.7 ns .. 265.9 ns) benchmarking keys database/addAssociatedFile to 10000 (old) time 489.7 μs (488.7 μs .. 490.7 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 490.4 μs (489.6 μs .. 492.2 μs) std dev 3.990 μs (2.435 μs .. 7.604 μs) benchmarking keys database/addAssociatedFile to 10000 (new) time 9.994 ms (9.186 ms .. 10.74 ms) 0.959 R² (0.928 R² .. 0.979 R²) mean 9.906 ms (9.343 ms .. 10.40 ms) std dev 1.384 ms (1.051 ms .. 2.100 ms) variance introduced by outliers: 69% (severely inflated)
2016-01-12 18:01:50 +00:00
fromIKey :: IKey -> Key
fromIKey (IKey s) = fromMaybe (error $ "bad serialized Key " ++ s) (deserializeKey s)
change keys database to use IKey type with more efficient serialization This breaks any existing keys database! IKey serializes more efficiently than SKey, although this limits the use of its Read/Show instances. This makes the keys database use less disk space, and so should be a win. Updated benchmark: benchmarking keys database/getAssociatedFiles from 1000 (hit) time 64.04 μs (63.95 μs .. 64.13 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.02 μs (63.96 μs .. 64.08 μs) std dev 218.2 ns (172.5 ns .. 299.3 ns) benchmarking keys database/getAssociatedFiles from 1000 (miss) time 52.53 μs (52.18 μs .. 53.21 μs) 0.999 R² (0.998 R² .. 1.000 R²) mean 52.31 μs (52.18 μs .. 52.91 μs) std dev 734.6 ns (206.2 ns .. 1.623 μs) benchmarking keys database/getAssociatedKey from 1000 (hit) time 64.60 μs (64.46 μs .. 64.77 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.74 μs (64.57 μs .. 65.20 μs) std dev 900.2 ns (389.7 ns .. 1.733 μs) benchmarking keys database/getAssociatedKey from 1000 (miss) time 52.46 μs (52.29 μs .. 52.68 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 52.63 μs (52.35 μs .. 53.37 μs) std dev 1.362 μs (562.7 ns .. 2.608 μs) variance introduced by outliers: 24% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (old) time 487.3 μs (484.7 μs .. 490.1 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 490.9 μs (487.8 μs .. 496.5 μs) std dev 13.95 μs (6.841 μs .. 22.03 μs) variance introduced by outliers: 20% (moderately inflated) benchmarking keys database/addAssociatedFile to 1000 (new) time 6.633 ms (5.741 ms .. 7.751 ms) 0.905 R² (0.850 R² .. 0.965 R²) mean 8.252 ms (7.803 ms .. 8.602 ms) std dev 1.126 ms (900.3 μs .. 1.430 ms) variance introduced by outliers: 72% (severely inflated) benchmarking keys database/getAssociatedFiles from 10000 (hit) time 65.36 μs (64.71 μs .. 66.37 μs) 0.998 R² (0.995 R² .. 1.000 R²) mean 65.28 μs (64.72 μs .. 66.45 μs) std dev 2.576 μs (920.8 ns .. 4.122 μs) variance introduced by outliers: 42% (moderately inflated) benchmarking keys database/getAssociatedFiles from 10000 (miss) time 52.34 μs (52.25 μs .. 52.45 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.49 μs (52.42 μs .. 52.59 μs) std dev 255.4 ns (205.8 ns .. 312.9 ns) benchmarking keys database/getAssociatedKey from 10000 (hit) time 64.76 μs (64.67 μs .. 64.84 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 64.67 μs (64.62 μs .. 64.72 μs) std dev 177.3 ns (148.1 ns .. 217.1 ns) benchmarking keys database/getAssociatedKey from 10000 (miss) time 52.75 μs (52.66 μs .. 52.82 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 52.69 μs (52.63 μs .. 52.75 μs) std dev 210.6 ns (173.7 ns .. 265.9 ns) benchmarking keys database/addAssociatedFile to 10000 (old) time 489.7 μs (488.7 μs .. 490.7 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 490.4 μs (489.6 μs .. 492.2 μs) std dev 3.990 μs (2.435 μs .. 7.604 μs) benchmarking keys database/addAssociatedFile to 10000 (new) time 9.994 ms (9.186 ms .. 10.74 ms) 0.959 R² (0.928 R² .. 0.979 R²) mean 9.906 ms (9.343 ms .. 10.40 ms) std dev 1.384 ms (1.051 ms .. 2.100 ms) variance introduced by outliers: 69% (severely inflated)
2016-01-12 18:01:50 +00:00
derivePersistField "IKey"
-- A serialized InodeCache
newtype SInodeCache = I String
deriving (Show, Read)
toSInodeCache :: InodeCache -> SInodeCache
toSInodeCache = I . showInodeCache
fromSInodeCache :: SInodeCache -> InodeCache
fromSInodeCache (I s) = fromMaybe (error $ "bad serialized InodeCache " ++ s) (readInodeCache s)
derivePersistField "SInodeCache"
-- A serialized FilePath.
--
-- Not all unicode characters round-trip through sqlite. In particular,
-- surrigate code points do not. So, escape the FilePath. But, only when
-- it contains such characters.
newtype SFilePath = SFilePath String
-- Note that Read instance does not work when used in any kind of complex
-- data structure.
instance Read SFilePath where
readsPrec _ s = [(SFilePath s, "")]
instance Show SFilePath where
show (SFilePath s) = s
toSFilePath :: FilePath -> SFilePath
toSFilePath s@('"':_) = SFilePath (show s)
toSFilePath s
| any needsescape s = SFilePath (show s)
| otherwise = SFilePath s
where
needsescape c = case generalCategory c of
Surrogate -> True
PrivateUse -> True
NotAssigned -> True
_ -> False
fromSFilePath :: SFilePath -> FilePath
fromSFilePath (SFilePath s@('"':_)) =
fromMaybe (error "bad serialized SFilePath " ++ s) (readish s)
fromSFilePath (SFilePath s) = s
derivePersistField "SFilePath"
-- A serialized Ref
newtype SRef = SRef Ref
-- Note that Read instance does not work when used in any kind of complex
-- data structure.
instance Read SRef where
readsPrec _ s = [(SRef (Ref s), "")]
instance Show SRef where
show (SRef (Ref s)) = s
derivePersistField "SRef"
toSRef :: Ref -> SRef
toSRef = SRef
fromSRef :: SRef -> Ref
fromSRef (SRef r) = r
instance PersistField UUID where
toPersistValue u = toPersistValue b
where
b :: S.ByteString
b = fromUUID u
fromPersistValue v = toUUID <$> go
where
go :: Either T.Text S.ByteString
go = fromPersistValue v
instance PersistFieldSql UUID where
sqlType _ = SqlBlob
instance PersistField ContentIdentifier where
toPersistValue (ContentIdentifier b) = toPersistValue b
fromPersistValue v = ContentIdentifier <$> go
where
go :: Either T.Text S.ByteString
go = fromPersistValue v
instance PersistFieldSql ContentIdentifier where
sqlType _ = SqlBlob