git-annex/Logs/Web.hs
Joey Hess 36f0bdcd57
add annex.alwayscompact
Added annex.alwayscompact setting which can be unset to speed up writes to
the git-annex branch in some cases.

Sponsored-by: Dartmouth College's DANDI project
2022-07-18 16:39:19 -04:00

132 lines
3.9 KiB
Haskell

{- Web url logs.
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Logs.Web (
URLString,
getUrls,
getUrlsWithPrefix,
setUrlPresent,
setUrlMissing,
Downloader(..),
getDownloader,
setDownloader,
setDownloader',
setTempUrl,
removeTempUrl,
parseUrlLog,
) where
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import Annex.Common
import qualified Annex
import Logs
import Logs.Presence
import Logs.Location
import Utility.Url
import Annex.UUID
import qualified Annex.Branch
import qualified Types.Remote as Remote
{- Gets all urls that a key might be available from. -}
getUrls :: Key -> Annex [URLString]
getUrls key = do
config <- Annex.getGitConfig
l <- go $ urlLogFile config key : oldurlLogs config key
tmpl <- Annex.getState (maybeToList . M.lookup key . Annex.tempurls)
return (tmpl ++ l)
where
go [] = return []
go (l:ls) = do
us <- currentLogInfo l
if null us
then go ls
else return $ map decodeUrlLogInfo us
getUrlsWithPrefix :: Key -> String -> Annex [URLString]
getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`)
. map (fst . getDownloader)
<$> getUrls key
setUrlPresent :: Key -> URLString -> Annex ()
setUrlPresent key url = do
-- Avoid reading the url log when not compacting, for speed.
us <- ifM (annexAlwaysCompact <$> Annex.getGitConfig)
( getUrls key
, pure mempty
)
unless (url `elem` us) $ do
config <- Annex.getGitConfig
addLog (Annex.Branch.RegardingUUID []) (urlLogFile config key)
InfoPresent (LogInfo (encodeBS url))
-- If the url does not have an OtherDownloader, it must be present
-- in the web.
case snd (getDownloader url) of
OtherDownloader -> return ()
_ -> logChange key webUUID InfoPresent
setUrlMissing :: Key -> URLString -> Annex ()
setUrlMissing key url = do
-- Avoid making any changes if the url was not registered.
us <- getUrls key
when (url `elem` us) $ do
config <- Annex.getGitConfig
addLog (Annex.Branch.RegardingUUID []) (urlLogFile config key)
InfoMissing (LogInfo (encodeBS url))
-- If the url was a web url and none of the remaining urls
-- for the key are web urls, the key must not be present
-- in the web.
when (isweb url && null (filter isweb $ filter (/= url) us)) $
logChange key webUUID InfoMissing
where
isweb u = case snd (getDownloader u) of
OtherDownloader -> False
_ -> True
setTempUrl :: Key -> URLString -> Annex ()
setTempUrl key url = Annex.changeState $ \s ->
s { Annex.tempurls = M.insert key url (Annex.tempurls s) }
removeTempUrl :: Key -> Annex ()
removeTempUrl key = Annex.changeState $ \s ->
s { Annex.tempurls = M.delete key (Annex.tempurls s) }
data Downloader = WebDownloader | YoutubeDownloader | QuviDownloader | OtherDownloader
deriving (Eq, Show, Enum, Bounded)
{- To keep track of how an url is downloaded, it's mangled slightly in
- the log, with a prefix indicating when a Downloader is used. -}
setDownloader :: URLString -> Downloader -> String
setDownloader u WebDownloader = u
setDownloader u QuviDownloader = "quvi:" ++ u
setDownloader u YoutubeDownloader = "yt:" ++ u
setDownloader u OtherDownloader = ":" ++ u
setDownloader' :: URLString -> Remote -> String
setDownloader' u r
| Remote.uuid r == webUUID = setDownloader u WebDownloader
| otherwise = setDownloader u OtherDownloader
getDownloader :: URLString -> (URLString, Downloader)
getDownloader u = case separate (== ':') u of
("yt", u') -> (u', YoutubeDownloader)
-- quvi is not used any longer; youtube-dl should be able to handle
-- all urls it did.
("quvi", u') -> (u', YoutubeDownloader)
("", u') -> (u', OtherDownloader)
_ -> (u, WebDownloader)
decodeUrlLogInfo :: LogInfo -> URLString
decodeUrlLogInfo = decodeBS . fromLogInfo
{- Parses the content of an url log file, returning the urls that are
- currently recorded. -}
parseUrlLog :: L.ByteString -> [URLString]
parseUrlLog = map decodeUrlLogInfo . getLog