sqlite datbase for importfeed
importfeed: Use caching database to avoid needing to list urls on every run, and avoid using too much memory. Benchmarking in my podcasts repo, importfeed got 1.42 seconds faster, and memory use dropped from 203000k to 59408k. Database.ImportFeed is Database.ContentIdentifier with the serial number filed off. There is a bit of code duplication I would like to avoid, particularly recordAnnexBranchTree, and getAnnexBranchTree. But these use the persistent sqlite tables, so despite the code being the same, they cannot be factored out. Since this database includes the contentidentifier metadata, it will be slightly redundant if a sqlite database is ever added for metadata. I did consider making such a generic database and using it for this. But, that would then need importfeed to update both the url database and the metadata database, which is twice as much work diffing the git-annex branch trees. Or would entagle updating two databases in a complex way. So instead it seems better to optimise the database that importfeed needs, and if the metadata database is used by another command, use a little more disk space and do a little bit of redundant work to update it. Sponsored-by: unqueued on Patreon
This commit is contained in:
parent
df4a60e28d
commit
8bde6101e3
13 changed files with 287 additions and 82 deletions
|
@ -36,6 +36,7 @@ module Annex.Branch (
|
|||
withIndex,
|
||||
precache,
|
||||
overBranchFileContents,
|
||||
updatedFromTree,
|
||||
) where
|
||||
|
||||
import qualified Data.ByteString as B
|
||||
|
@ -1029,3 +1030,11 @@ overBranchFileContents' select go st = do
|
|||
Nothing -> getnext fs
|
||||
Just v -> Just (v, f, fs)
|
||||
|
||||
{- Check if the git-annex branch has been updated from the oldtree.
|
||||
- If so, returns the tuple of the old and new trees. -}
|
||||
updatedFromTree :: Git.Sha -> Annex (Maybe (Git.Sha, Git.Sha))
|
||||
updatedFromTree oldtree =
|
||||
inRepo (Git.Ref.tree fullname) >>= \case
|
||||
Just currtree | currtree /= oldtree ->
|
||||
return $ Just (oldtree, currtree)
|
||||
_ -> return Nothing
|
||||
|
|
|
@ -65,6 +65,8 @@ module Annex.Locations (
|
|||
gitAnnexImportLog,
|
||||
gitAnnexContentIdentifierDbDir,
|
||||
gitAnnexContentIdentifierLock,
|
||||
gitAnnexImportFeedDbDir,
|
||||
gitAnnexImportFeedDbLock,
|
||||
gitAnnexScheduleState,
|
||||
gitAnnexTransferDir,
|
||||
gitAnnexCredsDir,
|
||||
|
@ -460,6 +462,15 @@ gitAnnexImportLog :: UUID -> Git.Repo -> GitConfig -> RawFilePath
|
|||
gitAnnexImportLog u r c =
|
||||
gitAnnexImportDir r c P.</> fromUUID u P.</> "log"
|
||||
|
||||
{- Directory containing database used by importfeed. -}
|
||||
gitAnnexImportFeedDbDir :: Git.Repo -> GitConfig -> RawFilePath
|
||||
gitAnnexImportFeedDbDir r c =
|
||||
fromMaybe (gitAnnexDir r) (annexDbDir c) P.</> "importfeed"
|
||||
|
||||
{- Lock file for writing to the importfeed database. -}
|
||||
gitAnnexImportFeedDbLock :: Git.Repo -> GitConfig -> RawFilePath
|
||||
gitAnnexImportFeedDbLock r c = gitAnnexImportFeedDbDir r c <> ".lck"
|
||||
|
||||
{- .git/annex/schedulestate is used to store information about when
|
||||
- scheduled jobs were last run. -}
|
||||
gitAnnexScheduleState :: Git.Repo -> RawFilePath
|
||||
|
|
|
@ -15,7 +15,8 @@ module Annex.MetaData.StandardFields (
|
|||
isDateMetaField,
|
||||
lastChangedField,
|
||||
mkLastChangedField,
|
||||
isLastChangedField
|
||||
isLastChangedField,
|
||||
itemIdField
|
||||
) where
|
||||
|
||||
import Types.MetaData
|
||||
|
@ -61,3 +62,6 @@ lastchanged = "lastchanged"
|
|||
|
||||
lastchangedSuffix :: T.Text
|
||||
lastchangedSuffix = "-lastchanged"
|
||||
|
||||
itemIdField :: MetaField
|
||||
itemIdField = mkMetaFieldUnchecked "itemid"
|
||||
|
|
|
@ -2,6 +2,8 @@ git-annex (10.20230927) UNRELEASED; urgency=medium
|
|||
|
||||
* Ignore directories and other unusual files in .git/annex/journal/
|
||||
* Fix crash of enableremote when the special remote has embedcreds=yes.
|
||||
* importfeed: Use caching database to avoid needing to list urls
|
||||
on every run, and avoid using too much memory.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 10 Oct 2023 13:17:31 -0400
|
||||
|
||||
|
|
|
@ -48,9 +48,8 @@ import Logs.MetaData
|
|||
import Annex.MetaData
|
||||
import Annex.FileMatcher
|
||||
import Annex.UntrustedFilePath
|
||||
import qualified Annex.Branch
|
||||
import Logs
|
||||
import qualified Utility.RawFilePath as R
|
||||
import qualified Database.ImportFeed as Db
|
||||
|
||||
cmd :: Command
|
||||
cmd = notBareRepo $ withAnnexOptions os $
|
||||
|
@ -202,53 +201,25 @@ data DownloadLocation = Enclosure URLString | MediaLink URLString
|
|||
type ItemId = String
|
||||
|
||||
data Cache = Cache
|
||||
{ knownurls :: S.Set URLString
|
||||
, knownitems :: S.Set ItemId
|
||||
{ dbhandle :: Maybe Db.ImportFeedDbHandle
|
||||
, template :: Utility.Format.Format
|
||||
}
|
||||
|
||||
getCache :: Maybe String -> Annex Cache
|
||||
getCache opttemplate = ifM (Annex.getRead Annex.force)
|
||||
( ret S.empty S.empty
|
||||
( ret Nothing
|
||||
, do
|
||||
j <- jsonOutputEnabled
|
||||
unless j $
|
||||
showStartMessage (StartMessage "importfeed" (ActionItemOther (Just "gathering known urls")) (SeekInput []))
|
||||
(us, is) <- knownItems
|
||||
h <- Db.openDb
|
||||
unless j
|
||||
showEndOk
|
||||
ret (S.fromList us) (S.fromList is)
|
||||
ret (Just h)
|
||||
)
|
||||
where
|
||||
tmpl = Utility.Format.gen $ fromMaybe defaultTemplate opttemplate
|
||||
ret us is = return $ Cache us is tmpl
|
||||
|
||||
{- Scan all url logs and metadata logs in the branch and find urls
|
||||
- and ItemIds that are already known. -}
|
||||
knownItems :: Annex ([URLString], [ItemId])
|
||||
knownItems = Annex.Branch.overBranchFileContents select (go [] []) >>= \case
|
||||
Just r -> return r
|
||||
Nothing -> giveup "This repository is read-only."
|
||||
where
|
||||
select f
|
||||
| isUrlLog f = Just ()
|
||||
| isMetaDataLog f = Just ()
|
||||
| otherwise = Nothing
|
||||
|
||||
go uc ic reader = reader >>= \case
|
||||
Just ((), f, Just content)
|
||||
| isUrlLog f -> case parseUrlLog content of
|
||||
[] -> go uc ic reader
|
||||
us -> go (us++uc) ic reader
|
||||
| isMetaDataLog f ->
|
||||
let s = currentMetaDataValues itemIdField $
|
||||
parseCurrentMetaData content
|
||||
in if S.null s
|
||||
then go uc ic reader
|
||||
else go uc (map (decodeBS . fromMetaValue) (S.toList s)++ic) reader
|
||||
| otherwise -> go uc ic reader
|
||||
Just ((), _, Nothing) -> go uc ic reader
|
||||
Nothing -> return (uc, ic)
|
||||
ret h = return $ Cache h tmpl
|
||||
|
||||
findDownloads :: URLString -> Feed -> [ToDownload]
|
||||
findDownloads u f = catMaybes $ map mk (feedItems f)
|
||||
|
@ -285,14 +256,20 @@ startDownload addunlockedmatcher opts cache cv todownload = case location todown
|
|||
, downloadmedia linkurl mediaurl mediakey
|
||||
)
|
||||
where
|
||||
forced = Annex.getRead Annex.force
|
||||
|
||||
{- Avoids downloading any items that are already known to be
|
||||
- associated with a file in the annex, unless forced. -}
|
||||
checkknown url a
|
||||
| knownitemid || S.member url (knownurls cache)
|
||||
= ifM forced (a, nothingtodo)
|
||||
| otherwise = a
|
||||
- associated with a file in the annex. -}
|
||||
checkknown url a = case dbhandle cache of
|
||||
Just db -> ifM (liftIO $ Db.isKnownUrl db url)
|
||||
( nothingtodo
|
||||
, case getItemId (item todownload) of
|
||||
Just (_, itemid) ->
|
||||
ifM (liftIO $ Db.isKnownItemId db (fromFeedText itemid))
|
||||
( nothingtodo
|
||||
, a
|
||||
)
|
||||
_ -> a
|
||||
)
|
||||
Nothing -> a
|
||||
|
||||
nothingtodo = recordsuccess >> stop
|
||||
|
||||
|
@ -302,11 +279,6 @@ startDownload addunlockedmatcher opts cache cv todownload = case location todown
|
|||
startdownloadenclosure url = checkknown url $ startUrlDownload cv todownload url $
|
||||
downloadEnclosure addunlockedmatcher opts cache cv todownload url
|
||||
|
||||
knownitemid = case getItemId (item todownload) of
|
||||
Just (_, itemid) ->
|
||||
S.member (decodeBS $ fromFeedText itemid) (knownitems cache)
|
||||
_ -> False
|
||||
|
||||
downloadmedia linkurl mediaurl mediakey
|
||||
| rawOption (downloadOptions opts) = startdownloadlink
|
||||
| otherwise = ifM (youtubeDlSupported linkurl)
|
||||
|
@ -555,9 +527,6 @@ extractFields i = map (uncurry extractField)
|
|||
feedauthor = decodeBS . fromFeedText <$> getFeedAuthor (feed i)
|
||||
itemauthor = decodeBS . fromFeedText <$> getItemAuthor (item i)
|
||||
|
||||
itemIdField :: MetaField
|
||||
itemIdField = mkMetaFieldUnchecked "itemid"
|
||||
|
||||
extractField :: String -> [Maybe String] -> (String, String)
|
||||
extractField k [] = (k, noneValue)
|
||||
extractField k (Just v:_)
|
||||
|
|
|
@ -162,10 +162,7 @@ getAnnexBranchTree (ContentIdentifierHandle h _) = H.queryDbQueue h $ do
|
|||
needsUpdateFromLog :: ContentIdentifierHandle -> Annex (Maybe (Sha, Sha))
|
||||
needsUpdateFromLog db = do
|
||||
oldtree <- liftIO $ getAnnexBranchTree db
|
||||
inRepo (Git.Ref.tree Annex.Branch.fullname) >>= \case
|
||||
Just currtree | currtree /= oldtree ->
|
||||
return $ Just (oldtree, currtree)
|
||||
_ -> return Nothing
|
||||
Annex.Branch.updatedFromTree oldtree
|
||||
|
||||
{- The database should be locked for write when calling this. -}
|
||||
updateFromLog :: ContentIdentifierHandle -> (Sha, Sha) -> Annex ContentIdentifierHandle
|
||||
|
|
|
@ -73,18 +73,18 @@ share [mkPersist sqlSettings, mkMigrate "migrateExport"] [persistLowerCase|
|
|||
-- Files that have been exported to the remote and are present on it.
|
||||
Exported
|
||||
key Key
|
||||
file SFilePath
|
||||
file SByteString
|
||||
ExportedIndex key file
|
||||
-- Directories that exist on the remote, and the files that are in them.
|
||||
ExportedDirectory
|
||||
subdir SFilePath
|
||||
file SFilePath
|
||||
subdir SByteString
|
||||
file SByteString
|
||||
ExportedDirectoryIndex subdir file
|
||||
-- The content of the tree that has been exported to the remote.
|
||||
-- Not all of these files are necessarily present on the remote yet.
|
||||
ExportTree
|
||||
key Key
|
||||
file SFilePath
|
||||
file SByteString
|
||||
ExportTreeKeyFileIndex key file
|
||||
ExportTreeFileKeyIndex file key
|
||||
-- The tree stored in ExportTree
|
||||
|
@ -139,26 +139,26 @@ addExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
|
|||
addExportedLocation h k el = queueDb h $ do
|
||||
void $ insertUniqueFast $ Exported k ef
|
||||
let edirs = map
|
||||
(\ed -> ExportedDirectory (SFilePath (fromExportDirectory ed)) ef)
|
||||
(\ed -> ExportedDirectory (SByteString (fromExportDirectory ed)) ef)
|
||||
(exportDirectories el)
|
||||
putMany edirs
|
||||
where
|
||||
ef = SFilePath (fromExportLocation el)
|
||||
ef = SByteString (fromExportLocation el)
|
||||
|
||||
removeExportedLocation :: ExportHandle -> Key -> ExportLocation -> IO ()
|
||||
removeExportedLocation h k el = queueDb h $ do
|
||||
deleteWhere [ExportedKey ==. k, ExportedFile ==. ef]
|
||||
let subdirs = map (SFilePath . fromExportDirectory)
|
||||
let subdirs = map (SByteString . fromExportDirectory)
|
||||
(exportDirectories el)
|
||||
deleteWhere [ExportedDirectoryFile ==. ef, ExportedDirectorySubdir <-. subdirs]
|
||||
where
|
||||
ef = SFilePath (fromExportLocation el)
|
||||
ef = SByteString (fromExportLocation el)
|
||||
|
||||
{- Note that this does not see recently queued changes. -}
|
||||
getExportedLocation :: ExportHandle -> Key -> IO [ExportLocation]
|
||||
getExportedLocation (ExportHandle h _) k = H.queryDbQueue h $ do
|
||||
l <- selectList [ExportedKey ==. k] []
|
||||
return $ map (mkExportLocation . (\(SFilePath f) -> f) . exportedFile . entityVal) l
|
||||
return $ map (mkExportLocation . (\(SByteString f) -> f) . exportedFile . entityVal) l
|
||||
|
||||
{- Note that this does not see recently queued changes. -}
|
||||
isExportDirectoryEmpty :: ExportHandle -> ExportDirectory -> IO Bool
|
||||
|
@ -166,13 +166,13 @@ isExportDirectoryEmpty (ExportHandle h _) d = H.queryDbQueue h $ do
|
|||
l <- selectList [ExportedDirectorySubdir ==. ed] []
|
||||
return $ null l
|
||||
where
|
||||
ed = SFilePath $ fromExportDirectory d
|
||||
ed = SByteString $ fromExportDirectory d
|
||||
|
||||
{- Get locations in the export that might contain a key. -}
|
||||
getExportTree :: ExportHandle -> Key -> IO [ExportLocation]
|
||||
getExportTree (ExportHandle h _) k = H.queryDbQueue h $ do
|
||||
l <- selectList [ExportTreeKey ==. k] []
|
||||
return $ map (mkExportLocation . (\(SFilePath f) -> f) . exportTreeFile . entityVal) l
|
||||
return $ map (mkExportLocation . (\(SByteString f) -> f) . exportTreeFile . entityVal) l
|
||||
|
||||
{- Get keys that might be currently exported to a location.
|
||||
-
|
||||
|
@ -183,19 +183,19 @@ getExportTreeKey (ExportHandle h _) el = H.queryDbQueue h $ do
|
|||
map (exportTreeKey . entityVal)
|
||||
<$> selectList [ExportTreeFile ==. ef] []
|
||||
where
|
||||
ef = SFilePath (fromExportLocation el)
|
||||
ef = SByteString (fromExportLocation el)
|
||||
|
||||
addExportTree :: ExportHandle -> Key -> ExportLocation -> IO ()
|
||||
addExportTree h k loc = queueDb h $
|
||||
void $ insertUniqueFast $ ExportTree k ef
|
||||
where
|
||||
ef = SFilePath (fromExportLocation loc)
|
||||
ef = SByteString (fromExportLocation loc)
|
||||
|
||||
removeExportTree :: ExportHandle -> Key -> ExportLocation -> IO ()
|
||||
removeExportTree h k loc = queueDb h $
|
||||
deleteWhere [ExportTreeKey ==. k, ExportTreeFile ==. ef]
|
||||
where
|
||||
ef = SFilePath (fromExportLocation loc)
|
||||
ef = SByteString (fromExportLocation loc)
|
||||
|
||||
-- An action that is passed the old and new values that were exported,
|
||||
-- and updates state.
|
||||
|
|
211
Database/ImportFeed.hs
Normal file
211
Database/ImportFeed.hs
Normal file
|
@ -0,0 +1,211 @@
|
|||
{- Sqlite database of known urls, and another of known itemids,
|
||||
- for use by git-annex importfeed.
|
||||
-
|
||||
- Copyright 2023 Joey Hess <id@joeyh.name>
|
||||
-:
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE QuasiQuotes, TypeFamilies, TypeOperators, TemplateHaskell #-}
|
||||
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts, EmptyDataDecls #-}
|
||||
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
|
||||
{-# LANGUAGE RankNTypes #-}
|
||||
{-# LANGUAGE DataKinds, FlexibleInstances #-}
|
||||
{-# LANGUAGE UndecidableInstances #-}
|
||||
#if MIN_VERSION_persistent_template(2,8,0)
|
||||
{-# LANGUAGE DerivingStrategies #-}
|
||||
{-# LANGUAGE StandaloneDeriving #-}
|
||||
#endif
|
||||
|
||||
module Database.ImportFeed (
|
||||
ImportFeedDbHandle,
|
||||
openDb,
|
||||
closeDb,
|
||||
isKnownUrl,
|
||||
isKnownItemId,
|
||||
) where
|
||||
|
||||
import Database.Types
|
||||
import qualified Database.Queue as H
|
||||
import Database.Init
|
||||
import Database.Utility
|
||||
import Annex.Locations
|
||||
import Annex.Common hiding (delete)
|
||||
import qualified Annex.Branch
|
||||
import Git.Types
|
||||
import Git.Sha
|
||||
import Git.FilePath
|
||||
import qualified Git.DiffTree as DiffTree
|
||||
import Logs
|
||||
import Logs.Web
|
||||
import Logs.MetaData
|
||||
import Types.MetaData
|
||||
import Annex.MetaData.StandardFields
|
||||
import Annex.LockFile
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import Database.Persist.Sql hiding (Key)
|
||||
import Database.Persist.TH
|
||||
import qualified System.FilePath.ByteString as P
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.Set as S
|
||||
|
||||
data ImportFeedDbHandle = ImportFeedDbHandle H.DbQueue
|
||||
|
||||
-- Note on indexes: ContentIndentifiersKeyRemoteCidIndex etc are really
|
||||
-- uniqueness constraints, which cause sqlite to automatically add indexes.
|
||||
-- So when adding indexes, have to take care to only add ones that work as
|
||||
-- uniqueness constraints. (Unfortunately persistent does not support indexes
|
||||
-- that are not uniqueness constraints;
|
||||
-- https://github.com/yesodweb/persistent/issues/109)
|
||||
share [mkPersist sqlSettings, mkMigrate "migrateImportFeed"] [persistLowerCase|
|
||||
KnownUrls
|
||||
url SByteString
|
||||
UniqueUrl url
|
||||
KnownItemIds
|
||||
itemid SByteString
|
||||
UniqueItemId itemid
|
||||
-- The last git-annex branch tree sha that was used to update
|
||||
-- KnownUrls and KnownItemIds
|
||||
AnnexBranch
|
||||
tree SSha
|
||||
UniqueTree tree
|
||||
|]
|
||||
|
||||
{- Opens the database, creating it if it doesn't exist yet.
|
||||
- Updates the database from the git-annex branch. -}
|
||||
openDb :: Annex ImportFeedDbHandle
|
||||
openDb = do
|
||||
dbdir <- calcRepo' gitAnnexImportFeedDbDir
|
||||
let db = dbdir P.</> "db"
|
||||
isnew <- liftIO $ not <$> R.doesPathExist db
|
||||
when isnew $
|
||||
initDb db $ void $
|
||||
runMigrationSilent migrateImportFeed
|
||||
dbh <- liftIO $ H.openDbQueue db "known_urls"
|
||||
let h = ImportFeedDbHandle dbh
|
||||
needsUpdateFromLog h >>= \case
|
||||
Nothing -> return ()
|
||||
Just v -> do
|
||||
lck <- calcRepo' gitAnnexImportFeedDbLock
|
||||
withExclusiveLock lck $
|
||||
updateFromLog h v
|
||||
return h
|
||||
|
||||
closeDb :: ImportFeedDbHandle -> Annex ()
|
||||
closeDb (ImportFeedDbHandle h) = liftIO $ H.closeDbQueue h
|
||||
|
||||
isKnownUrl :: ImportFeedDbHandle -> URLString -> IO Bool
|
||||
isKnownUrl (ImportFeedDbHandle h) u =
|
||||
H.queryDbQueue h $ do
|
||||
l <- selectList
|
||||
[ KnownUrlsUrl ==. SByteString (encodeBS u)
|
||||
] []
|
||||
return $ not (null l)
|
||||
|
||||
isKnownItemId :: ImportFeedDbHandle -> B.ByteString -> IO Bool
|
||||
isKnownItemId (ImportFeedDbHandle h) i =
|
||||
H.queryDbQueue h $ do
|
||||
l <- selectList
|
||||
[ KnownItemIdsItemid ==. SByteString i
|
||||
] []
|
||||
return $ not (null l)
|
||||
|
||||
recordKnownUrl :: ImportFeedDbHandle -> URLString -> IO ()
|
||||
recordKnownUrl h u = queueDb h $
|
||||
void $ insertUniqueFast $ KnownUrls $ SByteString $ encodeBS u
|
||||
|
||||
recordKnownItemId :: ImportFeedDbHandle -> SByteString -> IO ()
|
||||
recordKnownItemId h i = queueDb h $
|
||||
void $ insertUniqueFast $ KnownItemIds i
|
||||
|
||||
recordAnnexBranchTree :: ImportFeedDbHandle -> Sha -> IO ()
|
||||
recordAnnexBranchTree h s = queueDb h $ do
|
||||
deleteWhere ([] :: [Filter AnnexBranch])
|
||||
void $ insertUniqueFast $ AnnexBranch $ toSSha s
|
||||
|
||||
getAnnexBranchTree :: ImportFeedDbHandle -> IO Sha
|
||||
getAnnexBranchTree (ImportFeedDbHandle h) = H.queryDbQueue h $ do
|
||||
l <- selectList ([] :: [Filter AnnexBranch]) []
|
||||
case l of
|
||||
(s:[]) -> return $ fromSSha $ annexBranchTree $ entityVal s
|
||||
_ -> return emptyTree
|
||||
|
||||
queueDb :: ImportFeedDbHandle -> SqlPersistM () -> IO ()
|
||||
queueDb (ImportFeedDbHandle h) = H.queueDb h checkcommit
|
||||
where
|
||||
-- commit queue after 10000 changes
|
||||
checkcommit sz _lastcommittime
|
||||
| sz > 10000 = return True
|
||||
| otherwise = return False
|
||||
|
||||
{- Check if the git-annex branch has been updated and the database needs
|
||||
- to be updated with any new information from it. -}
|
||||
needsUpdateFromLog :: ImportFeedDbHandle -> Annex (Maybe (Sha, Sha))
|
||||
needsUpdateFromLog db = do
|
||||
oldtree <- liftIO $ getAnnexBranchTree db
|
||||
Annex.Branch.updatedFromTree oldtree
|
||||
|
||||
{- The database should be locked for write when calling this. -}
|
||||
updateFromLog :: ImportFeedDbHandle -> (Sha, Sha) -> Annex ()
|
||||
updateFromLog db@(ImportFeedDbHandle h) (oldtree, currtree)
|
||||
| oldtree == emptyTree = do
|
||||
scanbranch
|
||||
out
|
||||
| otherwise = do
|
||||
scandiff
|
||||
out
|
||||
where
|
||||
out = liftIO $ do
|
||||
recordAnnexBranchTree db currtree
|
||||
H.flushDbQueue h
|
||||
|
||||
knownitemids s = liftIO $ forM_ (S.toList s) $
|
||||
recordKnownItemId db . SByteString . fromMetaValue
|
||||
|
||||
knownurls us = liftIO $ forM_ us $
|
||||
recordKnownUrl db
|
||||
|
||||
scandiff = do
|
||||
(l, cleanup) <- inRepo $
|
||||
DiffTree.diffTreeRecursive oldtree currtree
|
||||
mapM_ godiff l
|
||||
void $ liftIO $ cleanup
|
||||
|
||||
godiff ti = do
|
||||
let f = getTopFilePath (DiffTree.file ti)
|
||||
case extLogFileKey urlLogExt f of
|
||||
Just k -> do
|
||||
knownurls =<< getUrls k
|
||||
Nothing -> case extLogFileKey metaDataLogExt f of
|
||||
Just k -> do
|
||||
m <- getCurrentMetaData k
|
||||
knownitemids (currentMetaDataValues itemIdField m)
|
||||
Nothing -> return ()
|
||||
|
||||
-- When initially populating the database, this
|
||||
-- is faster than diffing from the empty tree
|
||||
-- and looking up every log file.
|
||||
scanbranch = Annex.Branch.overBranchFileContents toscan goscan >>= \case
|
||||
Just () -> return ()
|
||||
Nothing -> scandiff
|
||||
|
||||
toscan f
|
||||
| isUrlLog f = Just ()
|
||||
| isMetaDataLog f = Just ()
|
||||
| otherwise = Nothing
|
||||
|
||||
goscan reader = reader >>= \case
|
||||
Just ((), f, Just content)
|
||||
| isUrlLog f -> do
|
||||
knownurls (parseUrlLog content)
|
||||
goscan reader
|
||||
| isMetaDataLog f -> do
|
||||
knownitemids $
|
||||
currentMetaDataValues itemIdField $
|
||||
parseCurrentMetaData content
|
||||
goscan reader
|
||||
| otherwise -> goscan reader
|
||||
Just ((), _, Nothing) -> goscan reader
|
||||
Nothing -> return ()
|
|
@ -46,7 +46,7 @@ import Data.Maybe
|
|||
share [mkPersist sqlSettings, mkMigrate "migrateKeysDb"] [persistLowerCase|
|
||||
Associated
|
||||
key Key
|
||||
file SFilePath
|
||||
file SByteString
|
||||
KeyFileIndex key file
|
||||
FileKeyIndex file key
|
||||
Content
|
||||
|
@ -87,7 +87,7 @@ addAssociatedFile k f = queueDb $
|
|||
(Associated k af)
|
||||
[AssociatedFile =. af, AssociatedKey =. k]
|
||||
where
|
||||
af = SFilePath (getTopFilePath f)
|
||||
af = SByteString (getTopFilePath f)
|
||||
|
||||
-- Faster than addAssociatedFile, but only safe to use when the file
|
||||
-- was not associated with a different key before, as it does not delete
|
||||
|
@ -96,14 +96,14 @@ newAssociatedFile :: Key -> TopFilePath -> WriteHandle -> IO ()
|
|||
newAssociatedFile k f = queueDb $
|
||||
insert_ $ Associated k af
|
||||
where
|
||||
af = SFilePath (getTopFilePath f)
|
||||
af = SByteString (getTopFilePath f)
|
||||
|
||||
{- Note that the files returned were once associated with the key, but
|
||||
- some of them may not be any longer. -}
|
||||
getAssociatedFiles :: Key -> ReadHandle -> IO [TopFilePath]
|
||||
getAssociatedFiles k = readDb $ do
|
||||
l <- selectList [AssociatedKey ==. k] []
|
||||
return $ map (asTopFilePath . (\(SFilePath f) -> f) . associatedFile . entityVal) l
|
||||
return $ map (asTopFilePath . (\(SByteString f) -> f) . associatedFile . entityVal) l
|
||||
|
||||
{- Gets any keys that are on record as having a particular associated file.
|
||||
- (Should be one or none.) -}
|
||||
|
@ -112,13 +112,13 @@ getAssociatedKey f = readDb $ do
|
|||
l <- selectList [AssociatedFile ==. af] []
|
||||
return $ map (associatedKey . entityVal) l
|
||||
where
|
||||
af = SFilePath (getTopFilePath f)
|
||||
af = SByteString (getTopFilePath f)
|
||||
|
||||
removeAssociatedFile :: Key -> TopFilePath -> WriteHandle -> IO ()
|
||||
removeAssociatedFile k f = queueDb $
|
||||
deleteWhere [AssociatedKey ==. k, AssociatedFile ==. af]
|
||||
where
|
||||
af = SFilePath (getTopFilePath f)
|
||||
af = SByteString (getTopFilePath f)
|
||||
|
||||
addInodeCaches :: Key -> [InodeCache] -> WriteHandle -> IO ()
|
||||
addInodeCaches k is = queueDb $
|
||||
|
|
|
@ -79,15 +79,15 @@ instance PersistField ContentIdentifier where
|
|||
instance PersistFieldSql ContentIdentifier where
|
||||
sqlType _ = SqlBlob
|
||||
|
||||
-- A serialized RawFilePath.
|
||||
newtype SFilePath = SFilePath S.ByteString
|
||||
-- A serialized bytestring.
|
||||
newtype SByteString = SByteString S.ByteString
|
||||
deriving (Eq, Show)
|
||||
|
||||
instance PersistField SFilePath where
|
||||
toPersistValue (SFilePath b) = toPersistValue b
|
||||
fromPersistValue v = SFilePath <$> fromPersistValue v
|
||||
instance PersistField SByteString where
|
||||
toPersistValue (SByteString b) = toPersistValue b
|
||||
fromPersistValue v = SByteString <$> fromPersistValue v
|
||||
|
||||
instance PersistFieldSql SFilePath where
|
||||
instance PersistFieldSql SByteString where
|
||||
sqlType _ = SqlBlob
|
||||
|
||||
-- A serialized git Sha
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
* [[metadata]] for views
|
||||
* [[todo/cache_key_info]]
|
||||
* [[todo/importfeed_needs_more_memory_the_more_urls_there_are]]
|
||||
|
||||
What do all these have in common? They could all be improved by
|
||||
using some kind of database to locally store the information in an
|
||||
|
|
|
@ -15,3 +15,5 @@ branch is changed. --[[Joey]]
|
|||
> significantly slow in large repos. So I think worth doing.
|
||||
|
||||
[[!tag confirmed]]
|
||||
|
||||
> [[done]] --[[Joey]]
|
||||
|
|
|
@ -737,6 +737,7 @@ Executable git-annex
|
|||
Database.Export
|
||||
Database.Fsck
|
||||
Database.Handle
|
||||
Database.ImportFeed
|
||||
Database.Init
|
||||
Database.Keys
|
||||
Database.Keys.Handle
|
||||
|
|
Loading…
Reference in a new issue