improve data types for sqlite
This is a non-backwards compatable change, so not suitable for merging w/o a annex.version bump and transition code. Not yet tested. This improves performance of git-annex benchmark --databases across the board by 10-25%, since eg Key roundtrips as a ByteString. (serializeKey' produces a lazy ByteString, so there is still a copy involved in converting it to a strict ByteString. It may be faster to switch to using bytestring-strict-builder.) FilePath and Key are both stored as blobs. This avoids mojibake in some situations. It would be possible to use varchar instead, if persistent could avoid converting that to Text, but it seems there is no good way to do so. See doc/todo/sqlite_database_improvements.mdwn Eliminated some ugly artifacts of using Read/Show serialization; constructors and quoted strings are no longer stored in sqlite. Renamed SRef to SSha to reflect that it is only ever a git sha, not a ref name. Since it is limited to the characters in a sha, it is not affected by mojibake, so still uses String.
This commit is contained in:
parent
e1b21a0491
commit
c35a9047d3
9 changed files with 135 additions and 189 deletions
|
@ -5,124 +5,50 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE TemplateHaskell #-}
|
||||
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||
|
||||
module Database.Types where
|
||||
module Database.Types (
|
||||
module Database.Types,
|
||||
Key,
|
||||
) where
|
||||
|
||||
import Database.Persist.TH
|
||||
import Database.Persist.Class hiding (Key)
|
||||
import Database.Persist.Sql hiding (Key)
|
||||
import Data.Maybe
|
||||
import Data.Char
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.Text as T
|
||||
import Control.DeepSeq
|
||||
import qualified Data.Attoparsec.ByteString as A
|
||||
|
||||
import Utility.PartialPrelude
|
||||
import Key
|
||||
import Utility.InodeCache
|
||||
import Git.Types (Ref(..))
|
||||
import Utility.FileSystemEncoding
|
||||
import Git.Types
|
||||
import Types.UUID
|
||||
import Types.Import
|
||||
|
||||
-- A serialized Key
|
||||
newtype SKey = SKey String
|
||||
deriving (Show, Read)
|
||||
instance PersistField Key where
|
||||
toPersistValue = toPersistValue . L.toStrict . serializeKey'
|
||||
fromPersistValue b = fromPersistValue b >>= parse
|
||||
where
|
||||
parse = either (Left . T.pack) Right . A.parseOnly keyParser
|
||||
|
||||
toSKey :: Key -> SKey
|
||||
toSKey = SKey . serializeKey
|
||||
-- A key can contain arbitrarily encoded characters, so store in sqlite as a
|
||||
-- blob to avoid encoding problems.
|
||||
instance PersistFieldSql Key where
|
||||
sqlType _ = SqlBlob
|
||||
|
||||
fromSKey :: SKey -> Key
|
||||
fromSKey (SKey s) = fromMaybe (error $ "bad serialized Key " ++ s) (deserializeKey s)
|
||||
instance PersistField InodeCache where
|
||||
toPersistValue = toPersistValue . showInodeCache
|
||||
fromPersistValue b = fromPersistValue b >>= parse
|
||||
where
|
||||
parse s = maybe
|
||||
(Left $ T.pack $ "bad serialized InodeCache "++ s)
|
||||
Right
|
||||
(readInodeCache s)
|
||||
|
||||
derivePersistField "SKey"
|
||||
|
||||
-- A Key index. More efficient than SKey, but its Read instance does not
|
||||
-- work when it's used in any kind of complex data structure.
|
||||
newtype IKey = IKey String
|
||||
|
||||
instance NFData IKey where
|
||||
rnf (IKey s) = rnf s
|
||||
|
||||
instance Read IKey where
|
||||
readsPrec _ s = [(IKey s, "")]
|
||||
|
||||
instance Show IKey where
|
||||
show (IKey s) = s
|
||||
|
||||
toIKey :: Key -> IKey
|
||||
toIKey = IKey . serializeKey
|
||||
|
||||
fromIKey :: IKey -> Key
|
||||
fromIKey (IKey s) = fromMaybe (error $ "bad serialized Key " ++ s) (deserializeKey s)
|
||||
|
||||
derivePersistField "IKey"
|
||||
|
||||
-- A serialized InodeCache
|
||||
newtype SInodeCache = I String
|
||||
deriving (Show, Read)
|
||||
|
||||
toSInodeCache :: InodeCache -> SInodeCache
|
||||
toSInodeCache = I . showInodeCache
|
||||
|
||||
fromSInodeCache :: SInodeCache -> InodeCache
|
||||
fromSInodeCache (I s) = fromMaybe (error $ "bad serialized InodeCache " ++ s) (readInodeCache s)
|
||||
|
||||
derivePersistField "SInodeCache"
|
||||
|
||||
-- A serialized FilePath.
|
||||
--
|
||||
-- Not all unicode characters round-trip through sqlite. In particular,
|
||||
-- surrigate code points do not. So, escape the FilePath. But, only when
|
||||
-- it contains such characters.
|
||||
newtype SFilePath = SFilePath String
|
||||
|
||||
-- Note that Read instance does not work when used in any kind of complex
|
||||
-- data structure.
|
||||
instance Read SFilePath where
|
||||
readsPrec _ s = [(SFilePath s, "")]
|
||||
|
||||
instance Show SFilePath where
|
||||
show (SFilePath s) = s
|
||||
|
||||
toSFilePath :: FilePath -> SFilePath
|
||||
toSFilePath s@('"':_) = SFilePath (show s)
|
||||
toSFilePath s
|
||||
| any needsescape s = SFilePath (show s)
|
||||
| otherwise = SFilePath s
|
||||
where
|
||||
needsescape c = case generalCategory c of
|
||||
Surrogate -> True
|
||||
PrivateUse -> True
|
||||
NotAssigned -> True
|
||||
_ -> False
|
||||
|
||||
fromSFilePath :: SFilePath -> FilePath
|
||||
fromSFilePath (SFilePath s@('"':_)) =
|
||||
fromMaybe (error "bad serialized SFilePath " ++ s) (readish s)
|
||||
fromSFilePath (SFilePath s) = s
|
||||
|
||||
derivePersistField "SFilePath"
|
||||
|
||||
-- A serialized Ref
|
||||
newtype SRef = SRef Ref
|
||||
|
||||
-- Note that Read instance does not work when used in any kind of complex
|
||||
-- data structure.
|
||||
instance Read SRef where
|
||||
readsPrec _ s = [(SRef (Ref s), "")]
|
||||
|
||||
instance Show SRef where
|
||||
show (SRef (Ref s)) = s
|
||||
|
||||
derivePersistField "SRef"
|
||||
|
||||
toSRef :: Ref -> SRef
|
||||
toSRef = SRef
|
||||
|
||||
fromSRef :: SRef -> Ref
|
||||
fromSRef (SRef r) = r
|
||||
instance PersistFieldSql InodeCache where
|
||||
sqlType _ = SqlString
|
||||
|
||||
instance PersistField UUID where
|
||||
toPersistValue u = toPersistValue b
|
||||
|
@ -146,3 +72,37 @@ instance PersistField ContentIdentifier where
|
|||
|
||||
instance PersistFieldSql ContentIdentifier where
|
||||
sqlType _ = SqlBlob
|
||||
|
||||
-- A serialized FilePath. Stored as a ByteString to avoid encoding problems.
|
||||
newtype SFilePath = SFilePath S.ByteString
|
||||
deriving (Eq, Show)
|
||||
|
||||
toSFilePath :: FilePath -> SFilePath
|
||||
toSFilePath = SFilePath . encodeBS
|
||||
|
||||
fromSFilePath :: SFilePath -> FilePath
|
||||
fromSFilePath (SFilePath b) = decodeBS b
|
||||
|
||||
instance PersistField SFilePath where
|
||||
toPersistValue (SFilePath b) = toPersistValue b
|
||||
fromPersistValue v = SFilePath <$> fromPersistValue v
|
||||
|
||||
instance PersistFieldSql SFilePath where
|
||||
sqlType _ = SqlBlob
|
||||
|
||||
-- A serialized git Sha
|
||||
newtype SSha = SSha String
|
||||
deriving (Eq, Show)
|
||||
|
||||
toSSha :: Sha -> SSha
|
||||
toSSha (Ref s) = SSha s
|
||||
|
||||
fromSSha :: SSha -> Ref
|
||||
fromSSha (SSha s) = Ref s
|
||||
|
||||
instance PersistField SSha where
|
||||
toPersistValue (SSha b) = toPersistValue b
|
||||
fromPersistValue v = SSha <$> fromPersistValue v
|
||||
|
||||
instance PersistFieldSql SSha where
|
||||
sqlType _ = SqlString
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue