From 8bde6101e34a1623a46626af1f203351ad4fabad Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 23 Oct 2023 16:12:26 -0400 Subject: [PATCH] sqlite datbase for importfeed importfeed: Use caching database to avoid needing to list urls on every run, and avoid using too much memory. Benchmarking in my podcasts repo, importfeed got 1.42 seconds faster, and memory use dropped from 203000k to 59408k. Database.ImportFeed is Database.ContentIdentifier with the serial number filed off. There is a bit of code duplication I would like to avoid, particularly recordAnnexBranchTree, and getAnnexBranchTree. But these use the persistent sqlite tables, so despite the code being the same, they cannot be factored out. Since this database includes the contentidentifier metadata, it will be slightly redundant if a sqlite database is ever added for metadata. I did consider making such a generic database and using it for this. But, that would then need importfeed to update both the url database and the metadata database, which is twice as much work diffing the git-annex branch trees. Or would entagle updating two databases in a complex way. So instead it seems better to optimise the database that importfeed needs, and if the metadata database is used by another command, use a little more disk space and do a little bit of redundant work to update it. Sponsored-by: unqueued on Patreon --- Annex/Branch.hs | 9 + Annex/Locations.hs | 11 + Annex/MetaData/StandardFields.hs | 6 +- CHANGELOG | 2 + Command/ImportFeed.hs | 69 ++---- Database/ContentIdentifier.hs | 5 +- Database/Export.hs | 28 +-- Database/ImportFeed.hs | 211 ++++++++++++++++++ Database/Keys/SQL.hs | 12 +- Database/Types.hs | 12 +- doc/design/caching_database.mdwn | 1 - ...s_more_memory_the_more_urls_there_are.mdwn | 2 + git-annex.cabal | 1 + 13 files changed, 287 insertions(+), 82 deletions(-) create mode 100644 Database/ImportFeed.hs diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 9acfe6935d..f00ff04504 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -36,6 +36,7 @@ module Annex.Branch ( withIndex, precache, overBranchFileContents, + updatedFromTree, ) where import qualified Data.ByteString as B @@ -1029,3 +1030,11 @@ overBranchFileContents' select go st = do Nothing -> getnext fs Just v -> Just (v, f, fs) +{- Check if the git-annex branch has been updated from the oldtree. + - If so, returns the tuple of the old and new trees. -} +updatedFromTree :: Git.Sha -> Annex (Maybe (Git.Sha, Git.Sha)) +updatedFromTree oldtree = + inRepo (Git.Ref.tree fullname) >>= \case + Just currtree | currtree /= oldtree -> + return $ Just (oldtree, currtree) + _ -> return Nothing diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 1f793321a6..375a285b8a 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -65,6 +65,8 @@ module Annex.Locations ( gitAnnexImportLog, gitAnnexContentIdentifierDbDir, gitAnnexContentIdentifierLock, + gitAnnexImportFeedDbDir, + gitAnnexImportFeedDbLock, gitAnnexScheduleState, gitAnnexTransferDir, gitAnnexCredsDir, @@ -460,6 +462,15 @@ gitAnnexImportLog :: UUID -> Git.Repo -> GitConfig -> RawFilePath gitAnnexImportLog u r c = gitAnnexImportDir r c P. fromUUID u P. "log" +{- Directory containing database used by importfeed. -} +gitAnnexImportFeedDbDir :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexImportFeedDbDir r c = + fromMaybe (gitAnnexDir r) (annexDbDir c) P. "importfeed" + +{- Lock file for writing to the importfeed database. -} +gitAnnexImportFeedDbLock :: Git.Repo -> GitConfig -> RawFilePath +gitAnnexImportFeedDbLock r c = gitAnnexImportFeedDbDir r c <> ".lck" + {- .git/annex/schedulestate is used to store information about when - scheduled jobs were last run. -} gitAnnexScheduleState :: Git.Repo -> RawFilePath diff --git a/Annex/MetaData/StandardFields.hs b/Annex/MetaData/StandardFields.hs index 9cc3ed4c49..061133b41c 100644 --- a/Annex/MetaData/StandardFields.hs +++ b/Annex/MetaData/StandardFields.hs @@ -15,7 +15,8 @@ module Annex.MetaData.StandardFields ( isDateMetaField, lastChangedField, mkLastChangedField, - isLastChangedField + isLastChangedField, + itemIdField ) where import Types.MetaData @@ -61,3 +62,6 @@ lastchanged = "lastchanged" lastchangedSuffix :: T.Text lastchangedSuffix = "-lastchanged" + +itemIdField :: MetaField +itemIdField = mkMetaFieldUnchecked "itemid" diff --git a/CHANGELOG b/CHANGELOG index fe3f5931c9..b1b6864f3d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,8 @@ git-annex (10.20230927) UNRELEASED; urgency=medium * Ignore directories and other unusual files in .git/annex/journal/ * Fix crash of enableremote when the special remote has embedcreds=yes. + * importfeed: Use caching database to avoid needing to list urls + on every run, and avoid using too much memory. -- Joey Hess Tue, 10 Oct 2023 13:17:31 -0400 diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 3122c8f601..98a328bff0 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -48,9 +48,8 @@ import Logs.MetaData import Annex.MetaData import Annex.FileMatcher import Annex.UntrustedFilePath -import qualified Annex.Branch -import Logs import qualified Utility.RawFilePath as R +import qualified Database.ImportFeed as Db cmd :: Command cmd = notBareRepo $ withAnnexOptions os $ @@ -202,53 +201,25 @@ data DownloadLocation = Enclosure URLString | MediaLink URLString type ItemId = String data Cache = Cache - { knownurls :: S.Set URLString - , knownitems :: S.Set ItemId + { dbhandle :: Maybe Db.ImportFeedDbHandle , template :: Utility.Format.Format } getCache :: Maybe String -> Annex Cache getCache opttemplate = ifM (Annex.getRead Annex.force) - ( ret S.empty S.empty + ( ret Nothing , do j <- jsonOutputEnabled unless j $ showStartMessage (StartMessage "importfeed" (ActionItemOther (Just "gathering known urls")) (SeekInput [])) - (us, is) <- knownItems + h <- Db.openDb unless j showEndOk - ret (S.fromList us) (S.fromList is) + ret (Just h) ) where tmpl = Utility.Format.gen $ fromMaybe defaultTemplate opttemplate - ret us is = return $ Cache us is tmpl - -{- Scan all url logs and metadata logs in the branch and find urls - - and ItemIds that are already known. -} -knownItems :: Annex ([URLString], [ItemId]) -knownItems = Annex.Branch.overBranchFileContents select (go [] []) >>= \case - Just r -> return r - Nothing -> giveup "This repository is read-only." - where - select f - | isUrlLog f = Just () - | isMetaDataLog f = Just () - | otherwise = Nothing - - go uc ic reader = reader >>= \case - Just ((), f, Just content) - | isUrlLog f -> case parseUrlLog content of - [] -> go uc ic reader - us -> go (us++uc) ic reader - | isMetaDataLog f -> - let s = currentMetaDataValues itemIdField $ - parseCurrentMetaData content - in if S.null s - then go uc ic reader - else go uc (map (decodeBS . fromMetaValue) (S.toList s)++ic) reader - | otherwise -> go uc ic reader - Just ((), _, Nothing) -> go uc ic reader - Nothing -> return (uc, ic) + ret h = return $ Cache h tmpl findDownloads :: URLString -> Feed -> [ToDownload] findDownloads u f = catMaybes $ map mk (feedItems f) @@ -285,14 +256,20 @@ startDownload addunlockedmatcher opts cache cv todownload = case location todown , downloadmedia linkurl mediaurl mediakey ) where - forced = Annex.getRead Annex.force - {- Avoids downloading any items that are already known to be - - associated with a file in the annex, unless forced. -} - checkknown url a - | knownitemid || S.member url (knownurls cache) - = ifM forced (a, nothingtodo) - | otherwise = a + - associated with a file in the annex. -} + checkknown url a = case dbhandle cache of + Just db -> ifM (liftIO $ Db.isKnownUrl db url) + ( nothingtodo + , case getItemId (item todownload) of + Just (_, itemid) -> + ifM (liftIO $ Db.isKnownItemId db (fromFeedText itemid)) + ( nothingtodo + , a + ) + _ -> a + ) + Nothing -> a nothingtodo = recordsuccess >> stop @@ -302,11 +279,6 @@ startDownload addunlockedmatcher opts cache cv todownload = case location todown startdownloadenclosure url = checkknown url $ startUrlDownload cv todownload url $ downloadEnclosure addunlockedmatcher opts cache cv todownload url - knownitemid = case getItemId (item todownload) of - Just (_, itemid) -> - S.member (decodeBS $ fromFeedText itemid) (knownitems cache) - _ -> False - downloadmedia linkurl mediaurl mediakey | rawOption (downloadOptions opts) = startdownloadlink | otherwise = ifM (youtubeDlSupported linkurl) @@ -555,9 +527,6 @@ extractFields i = map (uncurry extractField) feedauthor = decodeBS . fromFeedText <$> getFeedAuthor (feed i) itemauthor = decodeBS . fromFeedText <$> getItemAuthor (item i) -itemIdField :: MetaField -itemIdField = mkMetaFieldUnchecked "itemid" - extractField :: String -> [Maybe String] -> (String, String) extractField k [] = (k, noneValue) extractField k (Just v:_) diff --git a/Database/ContentIdentifier.hs b/Database/ContentIdentifier.hs index e847c00cce..f368c8c227 100644 --- a/Database/ContentIdentifier.hs +++ b/Database/ContentIdentifier.hs @@ -162,10 +162,7 @@ getAnnexBranchTree (ContentIdentifierHandle h _) = H.queryDbQueue h $ do needsUpdateFromLog :: ContentIdentifierHandle -> Annex (Maybe (Sha, Sha)) needsUpdateFromLog db = do oldtree <- liftIO $ getAnnexBranchTree db - inRepo (Git.Ref.tree Annex.Branch.fullname) >>= \case - Just currtree | currtree /= oldtree -> - return $ Just (oldtree, currtree) - _ -> return Nothing + Annex.Branch.updatedFromTree oldtree {- The database should be locked for write when calling this. -} updateFromLog :: ContentIdentifierHandle -> (Sha, Sha) -> Annex ContentIdentifierHandle diff --git a/Database/Export.hs b/Database/Export.hs index b9d9879da0..2f0da8b231 100644 --- a/Database/Export.hs +++ b/Database/Export.hs @@ -73,18 +73,18 @@ share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase| -- Files that have been exported to the remote and are present on it. Exported key Key - file SFilePath + file SByteString ExportedIndex key file -- Directories that exist on the remote, and the files that are in them. ExportedDirectory - subdir SFilePath - file SFilePath + subdir SByteString + file SByteString ExportedDirectoryIndex subdir file -- The content of the tree that has been exported to the remote. -- Not all of these files are necessarily present on the remote yet. ExportTree key Key - file SFilePath + file SByteString ExportTreeKeyFileIndex key file ExportTreeFileKeyIndex file key -- The tree stored in ExportTree @@ -139,26 +139,26 @@ addExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO () addExportedLocation h k el = queueDb h $ do void $ insertUniqueFast $ Exported k ef let edirs = map - (\ed -> ExportedDirectory (SFilePath (fromExportDirectory ed)) ef) + (\ed -> ExportedDirectory (SByteString (fromExportDirectory ed)) ef) (exportDirectories el) putMany edirs where - ef = SFilePath (fromExportLocation el) + ef = SByteString (fromExportLocation el) removeExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO () removeExportedLocation h k el = queueDb h $ do deleteWhere [ExportedKey ==. k, ExportedFile ==. ef] - let subdirs = map (SFilePath . fromExportDirectory) + let subdirs = map (SByteString . fromExportDirectory) (exportDirectories el) deleteWhere [ExportedDirectoryFile ==. ef, ExportedDirectorySubdir <-. subdirs] where - ef = SFilePath (fromExportLocation el) + ef = SByteString (fromExportLocation el) {- Note that this does not see recently queued changes. -} getExportedLocation :: ExportHandle -> Key -> IO [ExportLocation] getExportedLocation (ExportHandle h _) k = H.queryDbQueue h $ do l <- selectList [ExportedKey ==. k] [] - return $ map (mkExportLocation . (\(SFilePath f) -> f) . exportedFile . entityVal) l + return $ map (mkExportLocation . (\(SByteString f) -> f) . exportedFile . entityVal) l {- Note that this does not see recently queued changes. -} isExportDirectoryEmpty :: ExportHandle -> ExportDirectory -> IO Bool @@ -166,13 +166,13 @@ isExportDirectoryEmpty (ExportHandle h _) d = H.queryDbQueue h $ do l <- selectList [ExportedDirectorySubdir ==. ed] [] return $ null l where - ed = SFilePath $ fromExportDirectory d + ed = SByteString $ fromExportDirectory d {- Get locations in the export that might contain a key. -} getExportTree :: ExportHandle -> Key -> IO [ExportLocation] getExportTree (ExportHandle h _) k = H.queryDbQueue h $ do l <- selectList [ExportTreeKey ==. k] [] - return $ map (mkExportLocation . (\(SFilePath f) -> f) . exportTreeFile . entityVal) l + return $ map (mkExportLocation . (\(SByteString f) -> f) . exportTreeFile . entityVal) l {- Get keys that might be currently exported to a location. - @@ -183,19 +183,19 @@ getExportTreeKey (ExportHandle h _) el = H.queryDbQueue h $ do map (exportTreeKey . entityVal) <$> selectList [ExportTreeFile ==. ef] [] where - ef = SFilePath (fromExportLocation el) + ef = SByteString (fromExportLocation el) addExportTree :: ExportHandle -> Key -> ExportLocation -> IO () addExportTree h k loc = queueDb h $ void $ insertUniqueFast $ ExportTree k ef where - ef = SFilePath (fromExportLocation loc) + ef = SByteString (fromExportLocation loc) removeExportTree :: ExportHandle -> Key -> ExportLocation -> IO () removeExportTree h k loc = queueDb h $ deleteWhere [ExportTreeKey ==. k, ExportTreeFile ==. ef] where - ef = SFilePath (fromExportLocation loc) + ef = SByteString (fromExportLocation loc) -- An action that is passed the old and new values that were exported, -- and updates state. diff --git a/Database/ImportFeed.hs b/Database/ImportFeed.hs new file mode 100644 index 0000000000..69c0599124 --- /dev/null +++ b/Database/ImportFeed.hs @@ -0,0 +1,211 @@ +{- Sqlite database of known urls, and another of known itemids, + - for use by git-annex importfeed. + - + - Copyright 2023 Joey Hess + -: + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE QuasiQuotes, TypeFamilies, TypeOperators, TemplateHaskell #-} +{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts, EmptyDataDecls #-} +{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE DataKinds, FlexibleInstances #-} +{-# LANGUAGE UndecidableInstances #-} +#if MIN_VERSION_persistent_template(2,8,0) +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE StandaloneDeriving #-} +#endif + +module Database.ImportFeed ( + ImportFeedDbHandle, + openDb, + closeDb, + isKnownUrl, + isKnownItemId, +) where + +import Database.Types +import qualified Database.Queue as H +import Database.Init +import Database.Utility +import Annex.Locations +import Annex.Common hiding (delete) +import qualified Annex.Branch +import Git.Types +import Git.Sha +import Git.FilePath +import qualified Git.DiffTree as DiffTree +import Logs +import Logs.Web +import Logs.MetaData +import Types.MetaData +import Annex.MetaData.StandardFields +import Annex.LockFile +import qualified Utility.RawFilePath as R + +import Database.Persist.Sql hiding (Key) +import Database.Persist.TH +import qualified System.FilePath.ByteString as P +import qualified Data.ByteString as B +import qualified Data.Set as S + +data ImportFeedDbHandle = ImportFeedDbHandle H.DbQueue + +-- Note on indexes: ContentIndentifiersKeyRemoteCidIndex etc are really +-- uniqueness constraints, which cause sqlite to automatically add indexes. +-- So when adding indexes, have to take care to only add ones that work as +-- uniqueness constraints. (Unfortunately persistent does not support indexes +-- that are not uniqueness constraints; +-- https://github.com/yesodweb/persistent/issues/109) +share [mkPersist sqlSettings, mkMigrate "migrateImportFeed"] [persistLowerCase| +KnownUrls + url SByteString + UniqueUrl url +KnownItemIds + itemid SByteString + UniqueItemId itemid +-- The last git-annex branch tree sha that was used to update +-- KnownUrls and KnownItemIds +AnnexBranch + tree SSha + UniqueTree tree +|] + +{- Opens the database, creating it if it doesn't exist yet. + - Updates the database from the git-annex branch. -} +openDb :: Annex ImportFeedDbHandle +openDb = do + dbdir <- calcRepo' gitAnnexImportFeedDbDir + let db = dbdir P. "db" + isnew <- liftIO $ not <$> R.doesPathExist db + when isnew $ + initDb db $ void $ + runMigrationSilent migrateImportFeed + dbh <- liftIO $ H.openDbQueue db "known_urls" + let h = ImportFeedDbHandle dbh + needsUpdateFromLog h >>= \case + Nothing -> return () + Just v -> do + lck <- calcRepo' gitAnnexImportFeedDbLock + withExclusiveLock lck $ + updateFromLog h v + return h + +closeDb :: ImportFeedDbHandle -> Annex () +closeDb (ImportFeedDbHandle h) = liftIO $ H.closeDbQueue h + +isKnownUrl :: ImportFeedDbHandle -> URLString -> IO Bool +isKnownUrl (ImportFeedDbHandle h) u = + H.queryDbQueue h $ do + l <- selectList + [ KnownUrlsUrl ==. SByteString (encodeBS u) + ] [] + return $ not (null l) + +isKnownItemId :: ImportFeedDbHandle -> B.ByteString -> IO Bool +isKnownItemId (ImportFeedDbHandle h) i = + H.queryDbQueue h $ do + l <- selectList + [ KnownItemIdsItemid ==. SByteString i + ] [] + return $ not (null l) + +recordKnownUrl :: ImportFeedDbHandle -> URLString -> IO () +recordKnownUrl h u = queueDb h $ + void $ insertUniqueFast $ KnownUrls $ SByteString $ encodeBS u + +recordKnownItemId :: ImportFeedDbHandle -> SByteString -> IO () +recordKnownItemId h i = queueDb h $ + void $ insertUniqueFast $ KnownItemIds i + +recordAnnexBranchTree :: ImportFeedDbHandle -> Sha -> IO () +recordAnnexBranchTree h s = queueDb h $ do + deleteWhere ([] :: [Filter AnnexBranch]) + void $ insertUniqueFast $ AnnexBranch $ toSSha s + +getAnnexBranchTree :: ImportFeedDbHandle -> IO Sha +getAnnexBranchTree (ImportFeedDbHandle h) = H.queryDbQueue h $ do + l <- selectList ([] :: [Filter AnnexBranch]) [] + case l of + (s:[]) -> return $ fromSSha $ annexBranchTree $ entityVal s + _ -> return emptyTree + +queueDb :: ImportFeedDbHandle -> SqlPersistM () -> IO () +queueDb (ImportFeedDbHandle h) = H.queueDb h checkcommit + where + -- commit queue after 10000 changes + checkcommit sz _lastcommittime + | sz > 10000 = return True + | otherwise = return False + +{- Check if the git-annex branch has been updated and the database needs + - to be updated with any new information from it. -} +needsUpdateFromLog :: ImportFeedDbHandle -> Annex (Maybe (Sha, Sha)) +needsUpdateFromLog db = do + oldtree <- liftIO $ getAnnexBranchTree db + Annex.Branch.updatedFromTree oldtree + +{- The database should be locked for write when calling this. -} +updateFromLog :: ImportFeedDbHandle -> (Sha, Sha) -> Annex () +updateFromLog db@(ImportFeedDbHandle h) (oldtree, currtree) + | oldtree == emptyTree = do + scanbranch + out + | otherwise = do + scandiff + out + where + out = liftIO $ do + recordAnnexBranchTree db currtree + H.flushDbQueue h + + knownitemids s = liftIO $ forM_ (S.toList s) $ + recordKnownItemId db . SByteString . fromMetaValue + + knownurls us = liftIO $ forM_ us $ + recordKnownUrl db + + scandiff = do + (l, cleanup) <- inRepo $ + DiffTree.diffTreeRecursive oldtree currtree + mapM_ godiff l + void $ liftIO $ cleanup + + godiff ti = do + let f = getTopFilePath (DiffTree.file ti) + case extLogFileKey urlLogExt f of + Just k -> do + knownurls =<< getUrls k + Nothing -> case extLogFileKey metaDataLogExt f of + Just k -> do + m <- getCurrentMetaData k + knownitemids (currentMetaDataValues itemIdField m) + Nothing -> return () + + -- When initially populating the database, this + -- is faster than diffing from the empty tree + -- and looking up every log file. + scanbranch = Annex.Branch.overBranchFileContents toscan goscan >>= \case + Just () -> return () + Nothing -> scandiff + + toscan f + | isUrlLog f = Just () + | isMetaDataLog f = Just () + | otherwise = Nothing + + goscan reader = reader >>= \case + Just ((), f, Just content) + | isUrlLog f -> do + knownurls (parseUrlLog content) + goscan reader + | isMetaDataLog f -> do + knownitemids $ + currentMetaDataValues itemIdField $ + parseCurrentMetaData content + goscan reader + | otherwise -> goscan reader + Just ((), _, Nothing) -> goscan reader + Nothing -> return () diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs index 7fea5cf2bb..2e40e39db3 100644 --- a/Database/Keys/SQL.hs +++ b/Database/Keys/SQL.hs @@ -46,7 +46,7 @@ import Data.Maybe share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase| Associated key Key - file SFilePath + file SByteString KeyFileIndex key file FileKeyIndex file key Content @@ -87,7 +87,7 @@ addAssociatedFile k f = queueDb $ (Associated k af) [AssociatedFile =. af, AssociatedKey =. k] where - af = SFilePath (getTopFilePath f) + af = SByteString (getTopFilePath f) -- Faster than addAssociatedFile, but only safe to use when the file -- was not associated with a different key before, as it does not delete @@ -96,14 +96,14 @@ newAssociatedFile :: Key -> TopFilePath -> WriteHandle -> IO () newAssociatedFile k f = queueDb $ insert_ $ Associated k af where - af = SFilePath (getTopFilePath f) + af = SByteString (getTopFilePath f) {- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} getAssociatedFiles :: Key -> ReadHandle -> IO [TopFilePath] getAssociatedFiles k = readDb $ do l <- selectList [AssociatedKey ==. k] [] - return $ map (asTopFilePath . (\(SFilePath f) -> f) . associatedFile . entityVal) l + return $ map (asTopFilePath . (\(SByteString f) -> f) . associatedFile . entityVal) l {- Gets any keys that are on record as having a particular associated file. - (Should be one or none.) -} @@ -112,13 +112,13 @@ getAssociatedKey f = readDb $ do l <- selectList [AssociatedFile ==. af] [] return $ map (associatedKey . entityVal) l where - af = SFilePath (getTopFilePath f) + af = SByteString (getTopFilePath f) removeAssociatedFile :: Key -> TopFilePath -> WriteHandle -> IO () removeAssociatedFile k f = queueDb $ deleteWhere [AssociatedKey ==. k, AssociatedFile ==. af] where - af = SFilePath (getTopFilePath f) + af = SByteString (getTopFilePath f) addInodeCaches :: Key -> [InodeCache] -> WriteHandle -> IO () addInodeCaches k is = queueDb $ diff --git a/Database/Types.hs b/Database/Types.hs index 7b317ce6ac..bd8852852d 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -79,15 +79,15 @@ instance PersistField ContentIdentifier where instance PersistFieldSql ContentIdentifier where sqlType _ = SqlBlob --- A serialized RawFilePath. -newtype SFilePath = SFilePath S.ByteString +-- A serialized bytestring. +newtype SByteString = SByteString S.ByteString deriving (Eq, Show) -instance PersistField SFilePath where - toPersistValue (SFilePath b) = toPersistValue b - fromPersistValue v = SFilePath <$> fromPersistValue v +instance PersistField SByteString where + toPersistValue (SByteString b) = toPersistValue b + fromPersistValue v = SByteString <$> fromPersistValue v -instance PersistFieldSql SFilePath where +instance PersistFieldSql SByteString where sqlType _ = SqlBlob -- A serialized git Sha diff --git a/doc/design/caching_database.mdwn b/doc/design/caching_database.mdwn index adc4b774ef..c415aed3e8 100644 --- a/doc/design/caching_database.mdwn +++ b/doc/design/caching_database.mdwn @@ -1,6 +1,5 @@ * [[metadata]] for views * [[todo/cache_key_info]] -* [[todo/importfeed_needs_more_memory_the_more_urls_there_are]] What do all these have in common? They could all be improved by using some kind of database to locally store the information in an diff --git a/doc/todo/importfeed_needs_more_memory_the_more_urls_there_are.mdwn b/doc/todo/importfeed_needs_more_memory_the_more_urls_there_are.mdwn index 2515e755c9..eaca6becb1 100644 --- a/doc/todo/importfeed_needs_more_memory_the_more_urls_there_are.mdwn +++ b/doc/todo/importfeed_needs_more_memory_the_more_urls_there_are.mdwn @@ -15,3 +15,5 @@ branch is changed. --[[Joey]] > significantly slow in large repos. So I think worth doing. [[!tag confirmed]] + +> [[done]] --[[Joey]] diff --git a/git-annex.cabal b/git-annex.cabal index 9eccf45ed9..1b71daa5af 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -737,6 +737,7 @@ Executable git-annex Database.Export Database.Fsck Database.Handle + Database.ImportFeed Database.Init Database.Keys Database.Keys.Handle