Added annex.alwayscompact setting which can be unset to speed up writes to the git-annex branch in some cases. Sponsored-by: Dartmouth College's DANDI project
		
			
				
	
	
		
			132 lines
		
	
	
	
		
			3.9 KiB
			
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			132 lines
		
	
	
	
		
			3.9 KiB
			
		
	
	
	
		
			Haskell
		
	
	
	
	
	
{- Web url logs.
 | 
						|
 -
 | 
						|
 - Copyright 2011-2021 Joey Hess <id@joeyh.name>
 | 
						|
 -
 | 
						|
 - Licensed under the GNU AGPL version 3 or higher.
 | 
						|
 -}
 | 
						|
 | 
						|
{-# LANGUAGE BangPatterns #-}
 | 
						|
 | 
						|
module Logs.Web (
 | 
						|
	URLString,
 | 
						|
	getUrls,
 | 
						|
	getUrlsWithPrefix,
 | 
						|
	setUrlPresent,
 | 
						|
	setUrlMissing,
 | 
						|
	Downloader(..),
 | 
						|
	getDownloader,
 | 
						|
	setDownloader,
 | 
						|
	setDownloader',
 | 
						|
	setTempUrl,
 | 
						|
	removeTempUrl,
 | 
						|
	parseUrlLog,
 | 
						|
) where
 | 
						|
 | 
						|
import qualified Data.Map as M
 | 
						|
import qualified Data.ByteString.Lazy as L
 | 
						|
 | 
						|
import Annex.Common
 | 
						|
import qualified Annex
 | 
						|
import Logs
 | 
						|
import Logs.Presence
 | 
						|
import Logs.Location
 | 
						|
import Utility.Url
 | 
						|
import Annex.UUID
 | 
						|
import qualified Annex.Branch
 | 
						|
import qualified Types.Remote as Remote
 | 
						|
 | 
						|
{- Gets all urls that a key might be available from. -}
 | 
						|
getUrls :: Key -> Annex [URLString]
 | 
						|
getUrls key = do
 | 
						|
	config <- Annex.getGitConfig
 | 
						|
	l <- go $ urlLogFile config key : oldurlLogs config key
 | 
						|
	tmpl <- Annex.getState (maybeToList . M.lookup key . Annex.tempurls)
 | 
						|
	return (tmpl ++ l)
 | 
						|
  where
 | 
						|
	go [] = return []
 | 
						|
	go (l:ls) = do
 | 
						|
		us <- currentLogInfo l
 | 
						|
		if null us
 | 
						|
			then go ls
 | 
						|
			else return $ map decodeUrlLogInfo us
 | 
						|
 | 
						|
getUrlsWithPrefix :: Key -> String -> Annex [URLString]
 | 
						|
getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`) 
 | 
						|
	. map (fst . getDownloader)
 | 
						|
	<$> getUrls key
 | 
						|
 | 
						|
setUrlPresent :: Key -> URLString -> Annex ()
 | 
						|
setUrlPresent key url = do
 | 
						|
	-- Avoid reading the url log when not compacting, for speed.
 | 
						|
	us <- ifM (annexAlwaysCompact <$> Annex.getGitConfig)
 | 
						|
		( getUrls key
 | 
						|
		, pure mempty
 | 
						|
		)
 | 
						|
	unless (url `elem` us) $ do
 | 
						|
		config <- Annex.getGitConfig
 | 
						|
		addLog (Annex.Branch.RegardingUUID []) (urlLogFile config key)
 | 
						|
			InfoPresent (LogInfo (encodeBS url))
 | 
						|
	-- If the url does not have an OtherDownloader, it must be present
 | 
						|
	-- in the web.
 | 
						|
	case snd (getDownloader url) of
 | 
						|
		OtherDownloader -> return ()
 | 
						|
		_ -> logChange key webUUID InfoPresent
 | 
						|
 | 
						|
setUrlMissing :: Key -> URLString -> Annex ()
 | 
						|
setUrlMissing key url = do
 | 
						|
	-- Avoid making any changes if the url was not registered.
 | 
						|
	us <- getUrls key
 | 
						|
	when (url `elem` us) $ do
 | 
						|
		config <- Annex.getGitConfig
 | 
						|
		addLog (Annex.Branch.RegardingUUID []) (urlLogFile config key)
 | 
						|
			InfoMissing (LogInfo (encodeBS url))
 | 
						|
		-- If the url was a web url and none of the remaining urls
 | 
						|
		-- for the key are web urls, the key must not be present
 | 
						|
		-- in the web.
 | 
						|
		when (isweb url && null (filter isweb $ filter (/= url) us)) $
 | 
						|
			logChange key webUUID InfoMissing
 | 
						|
  where
 | 
						|
	isweb u = case snd (getDownloader u) of
 | 
						|
		OtherDownloader -> False
 | 
						|
		_ -> True
 | 
						|
 | 
						|
setTempUrl :: Key -> URLString -> Annex ()
 | 
						|
setTempUrl key url = Annex.changeState $ \s ->
 | 
						|
	s { Annex.tempurls = M.insert key url (Annex.tempurls s) }
 | 
						|
 | 
						|
removeTempUrl :: Key -> Annex ()
 | 
						|
removeTempUrl key = Annex.changeState $ \s ->
 | 
						|
	s { Annex.tempurls = M.delete key (Annex.tempurls s) }
 | 
						|
 | 
						|
data Downloader = WebDownloader | YoutubeDownloader | QuviDownloader | OtherDownloader
 | 
						|
	deriving (Eq, Show, Enum, Bounded)
 | 
						|
 | 
						|
{- To keep track of how an url is downloaded, it's mangled slightly in
 | 
						|
 - the log, with a prefix indicating when a Downloader is used. -}
 | 
						|
setDownloader :: URLString -> Downloader -> String
 | 
						|
setDownloader u WebDownloader = u
 | 
						|
setDownloader u QuviDownloader = "quvi:" ++ u
 | 
						|
setDownloader u YoutubeDownloader = "yt:" ++ u
 | 
						|
setDownloader u OtherDownloader = ":" ++ u
 | 
						|
 | 
						|
setDownloader' :: URLString -> Remote -> String
 | 
						|
setDownloader' u r
 | 
						|
	| Remote.uuid r == webUUID = setDownloader u WebDownloader
 | 
						|
	| otherwise = setDownloader u OtherDownloader
 | 
						|
 | 
						|
getDownloader :: URLString -> (URLString, Downloader)
 | 
						|
getDownloader u = case separate (== ':') u of
 | 
						|
	("yt", u') -> (u', YoutubeDownloader)
 | 
						|
	-- quvi is not used any longer; youtube-dl should be able to handle
 | 
						|
	-- all urls it did.
 | 
						|
	("quvi", u') -> (u', YoutubeDownloader)
 | 
						|
	("", u') -> (u', OtherDownloader)
 | 
						|
	_ -> (u, WebDownloader)
 | 
						|
 | 
						|
decodeUrlLogInfo :: LogInfo -> URLString
 | 
						|
decodeUrlLogInfo = decodeBS . fromLogInfo
 | 
						|
 | 
						|
{- Parses the content of an url log file, returning the urls that are
 | 
						|
 - currently recorded. -}
 | 
						|
parseUrlLog :: L.ByteString -> [URLString]
 | 
						|
parseUrlLog = map decodeUrlLogInfo . getLog
 |