speed up populating the importfeed database
Avoid conversion from ByteString to String for urls that will just be converted right back to ByteString to go into the database. Also setTempUrl is not used by importfeed, so avoid checking for temp urls in this code path. This benchmarks as only a small improvement. From 2.99s to 2.78s when populating a database with 33k urls. Note that it does not seem worth replacing URLString with URLByteString generally, because the ways urls are used all entails either parseURI, which takes a string, or passing a parameter to eg curl, which also is currently a string. Sponsored-by: Leon Schuermann on Patreon
This commit is contained in:
parent
aaeadc422a
commit
c9866d2164
2 changed files with 19 additions and 12 deletions
|
@ -112,9 +112,9 @@ isKnownItemId (ImportFeedDbHandle h) i =
|
||||||
] []
|
] []
|
||||||
return $ not (null l)
|
return $ not (null l)
|
||||||
|
|
||||||
recordKnownUrl :: ImportFeedDbHandle -> URLString -> IO ()
|
recordKnownUrl :: ImportFeedDbHandle -> URLByteString -> IO ()
|
||||||
recordKnownUrl h u = queueDb h $
|
recordKnownUrl h u = queueDb h $
|
||||||
void $ insertUniqueFast $ KnownUrls $ SByteString $ encodeBS u
|
void $ insertUniqueFast $ KnownUrls $ SByteString u
|
||||||
|
|
||||||
recordKnownItemId :: ImportFeedDbHandle -> SByteString -> IO ()
|
recordKnownItemId :: ImportFeedDbHandle -> SByteString -> IO ()
|
||||||
recordKnownItemId h i = queueDb h $
|
recordKnownItemId h i = queueDb h $
|
||||||
|
@ -177,7 +177,7 @@ updateFromLog db@(ImportFeedDbHandle h) (oldtree, currtree)
|
||||||
let f = getTopFilePath (DiffTree.file ti)
|
let f = getTopFilePath (DiffTree.file ti)
|
||||||
case extLogFileKey urlLogExt f of
|
case extLogFileKey urlLogExt f of
|
||||||
Just k -> do
|
Just k -> do
|
||||||
knownurls =<< getUrls k
|
knownurls =<< getUrls' k
|
||||||
Nothing -> case extLogFileKey metaDataLogExt f of
|
Nothing -> case extLogFileKey metaDataLogExt f of
|
||||||
Just k -> do
|
Just k -> do
|
||||||
m <- getCurrentMetaData k
|
m <- getCurrentMetaData k
|
||||||
|
|
25
Logs/Web.hs
25
Logs/Web.hs
|
@ -1,6 +1,6 @@
|
||||||
{- Web url logs.
|
{- Web url logs.
|
||||||
-
|
-
|
||||||
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -9,7 +9,9 @@
|
||||||
|
|
||||||
module Logs.Web (
|
module Logs.Web (
|
||||||
URLString,
|
URLString,
|
||||||
|
URLByteString,
|
||||||
getUrls,
|
getUrls,
|
||||||
|
getUrls',
|
||||||
getUrlsWithPrefix,
|
getUrlsWithPrefix,
|
||||||
setUrlPresent,
|
setUrlPresent,
|
||||||
setUrlMissing,
|
setUrlMissing,
|
||||||
|
@ -23,6 +25,7 @@ module Logs.Web (
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.ByteString as S
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
|
@ -35,20 +38,27 @@ import Annex.UUID
|
||||||
import qualified Annex.Branch
|
import qualified Annex.Branch
|
||||||
import qualified Types.Remote as Remote
|
import qualified Types.Remote as Remote
|
||||||
|
|
||||||
|
type URLByteString = S.ByteString
|
||||||
|
|
||||||
{- Gets all urls that a key might be available from. -}
|
{- Gets all urls that a key might be available from. -}
|
||||||
getUrls :: Key -> Annex [URLString]
|
getUrls :: Key -> Annex [URLString]
|
||||||
getUrls key = do
|
getUrls key = do
|
||||||
config <- Annex.getGitConfig
|
l <- map decodeBS <$> getUrls' key
|
||||||
l <- go $ urlLogFile config key : oldurlLogs config key
|
|
||||||
tmpl <- Annex.getState (maybeToList . M.lookup key . Annex.tempurls)
|
tmpl <- Annex.getState (maybeToList . M.lookup key . Annex.tempurls)
|
||||||
return (tmpl ++ l)
|
return (tmpl ++ l)
|
||||||
|
|
||||||
|
{- Note that this does not include temporary urls set with setTempUrl. -}
|
||||||
|
getUrls' :: Key -> Annex [URLByteString]
|
||||||
|
getUrls' key = do
|
||||||
|
config <- Annex.getGitConfig
|
||||||
|
go $ urlLogFile config key : oldurlLogs config key
|
||||||
where
|
where
|
||||||
go [] = return []
|
go [] = return []
|
||||||
go (l:ls) = do
|
go (l:ls) = do
|
||||||
us <- currentLogInfo l
|
us <- currentLogInfo l
|
||||||
if null us
|
if null us
|
||||||
then go ls
|
then go ls
|
||||||
else return $ map decodeUrlLogInfo us
|
else return $ map fromLogInfo us
|
||||||
|
|
||||||
getUrlsWithPrefix :: Key -> String -> Annex [URLString]
|
getUrlsWithPrefix :: Key -> String -> Annex [URLString]
|
||||||
getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`)
|
getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`)
|
||||||
|
@ -123,10 +133,7 @@ getDownloader u = case separate (== ':') u of
|
||||||
("", u') -> (u', OtherDownloader)
|
("", u') -> (u', OtherDownloader)
|
||||||
_ -> (u, WebDownloader)
|
_ -> (u, WebDownloader)
|
||||||
|
|
||||||
decodeUrlLogInfo :: LogInfo -> URLString
|
|
||||||
decodeUrlLogInfo = decodeBS . fromLogInfo
|
|
||||||
|
|
||||||
{- Parses the content of an url log file, returning the urls that are
|
{- Parses the content of an url log file, returning the urls that are
|
||||||
- currently recorded. -}
|
- currently recorded. -}
|
||||||
parseUrlLog :: L.ByteString -> [URLString]
|
parseUrlLog :: L.ByteString -> [URLByteString]
|
||||||
parseUrlLog = map decodeUrlLogInfo . getLog
|
parseUrlLog = map fromLogInfo . getLog
|
||||||
|
|
Loading…
Reference in a new issue