git-annex/Database/Types.hs
Joey Hess 8bde6101e3
sqlite datbase for importfeed
importfeed: Use caching database to avoid needing to list urls on every
run, and avoid using too much memory.

Benchmarking in my podcasts repo, importfeed got 1.42 seconds faster,
and memory use dropped from 203000k to 59408k.

Database.ImportFeed is Database.ContentIdentifier with the serial number
filed off. There is a bit of code duplication I would like to avoid,
particularly recordAnnexBranchTree, and getAnnexBranchTree. But these use
the persistent sqlite tables, so despite the code being the same, they
cannot be factored out.

Since this database includes the contentidentifier metadata, it will be
slightly redundant if a sqlite database is ever added for metadata. I
did consider making such a generic database and using it for this. But,
that would then need importfeed to update both the url database and the
metadata database, which is twice as much work diffing the git-annex
branch trees. Or would entagle updating two databases in a complex way.
So instead it seems better to optimise the database that
importfeed needs, and if the metadata database is used by another command,
use a little more disk space and do a little bit of redundant work to
update it.

Sponsored-by: unqueued on Patreon
2023-10-23 16:46:22 -04:00

131 lines
3.4 KiB
Haskell

{- types for SQL databases
-
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Database.Types (
module Database.Types,
Key,
EpochTime,
FileSize,
) where
import Database.Persist.Class hiding (Key)
import Database.Persist.Sql hiding (Key)
import qualified Data.ByteString as S
import qualified Data.Text as T
import qualified Data.Attoparsec.ByteString as A
import System.PosixCompat.Types
import Data.Int
import Data.Text.Read
import Foreign.C.Types
import Key
import Utility.InodeCache
import Utility.FileSize
import Utility.FileSystemEncoding
import Git.Types
import Types.UUID
import Types.Import
instance PersistField Key where
toPersistValue = toPersistValue . serializeKey'
fromPersistValue b = fromPersistValue b >>= parse
where
parse = either (Left . T.pack) Right . A.parseOnly keyParser
-- A key can contain arbitrarily encoded characters, so store in sqlite as a
-- blob to avoid encoding problems.
instance PersistFieldSql Key where
sqlType _ = SqlBlob
instance PersistField InodeCache where
toPersistValue = toPersistValue . showInodeCache
fromPersistValue b = fromPersistValue b >>= parse
where
parse s = maybe
(Left $ T.pack $ "bad serialized InodeCache "++ s)
Right
(readInodeCache s)
instance PersistFieldSql InodeCache where
sqlType _ = SqlString
instance PersistField UUID where
toPersistValue u = toPersistValue b
where
b :: S.ByteString
b = fromUUID u
fromPersistValue v = toUUID <$> go
where
go :: Either T.Text S.ByteString
go = fromPersistValue v
instance PersistFieldSql UUID where
sqlType _ = SqlBlob
instance PersistField ContentIdentifier where
toPersistValue (ContentIdentifier b) = toPersistValue b
fromPersistValue v = ContentIdentifier <$> go
where
go :: Either T.Text S.ByteString
go = fromPersistValue v
instance PersistFieldSql ContentIdentifier where
sqlType _ = SqlBlob
-- A serialized bytestring.
newtype SByteString = SByteString S.ByteString
deriving (Eq, Show)
instance PersistField SByteString where
toPersistValue (SByteString b) = toPersistValue b
fromPersistValue v = SByteString <$> fromPersistValue v
instance PersistFieldSql SByteString where
sqlType _ = SqlBlob
-- A serialized git Sha
newtype SSha = SSha String
deriving (Eq, Show)
toSSha :: Sha -> SSha
toSSha (Ref s) = SSha (decodeBS s)
fromSSha :: SSha -> Ref
fromSSha (SSha s) = Ref (encodeBS s)
instance PersistField SSha where
toPersistValue (SSha b) = toPersistValue b
fromPersistValue v = SSha <$> fromPersistValue v
instance PersistFieldSql SSha where
sqlType _ = SqlString
-- A FileSize could be stored as an Int64, but some systems could
-- conceivably have a larger filesize, and no math is ever done with them
-- in sqlite, so store a string instead.
instance PersistField FileSize where
toPersistValue = toPersistValue . show
fromPersistValue v = fromPersistValue v >>= parse
where
parse = either (Left . T.pack) (Right . fst) . decimal
instance PersistFieldSql FileSize where
sqlType _ = SqlString
-- Store EpochTime as an Int64, to allow selecting values in a range.
instance PersistField EpochTime where
toPersistValue (CTime t) = toPersistValue (fromIntegral t :: Int64)
fromPersistValue v = CTime . fromIntegral <$> go
where
go :: Either T.Text Int64
go = fromPersistValue v
instance PersistFieldSql EpochTime where
sqlType _ = SqlInt64