2011-10-15 20:25:51 +00:00
|
|
|
{- Web url logs.
|
|
|
|
-
|
2020-07-14 16:44:35 +00:00
|
|
|
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
2011-10-15 20:25:51 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2011-10-15 20:25:51 +00:00
|
|
|
-}
|
|
|
|
|
2020-07-14 16:44:35 +00:00
|
|
|
{-# LANGUAGE BangPatterns #-}
|
|
|
|
|
2011-10-15 20:25:51 +00:00
|
|
|
module Logs.Web (
|
|
|
|
URLString,
|
2012-11-29 21:01:07 +00:00
|
|
|
getUrls,
|
2014-12-08 17:32:27 +00:00
|
|
|
getUrlsWithPrefix,
|
2011-10-15 20:36:56 +00:00
|
|
|
setUrlPresent,
|
2012-11-29 21:01:07 +00:00
|
|
|
setUrlMissing,
|
2020-07-14 18:35:26 +00:00
|
|
|
withKnownUrls,
|
2013-08-22 22:25:21 +00:00
|
|
|
Downloader(..),
|
|
|
|
getDownloader,
|
|
|
|
setDownloader,
|
2016-01-19 19:55:32 +00:00
|
|
|
setDownloader',
|
2014-12-08 23:14:24 +00:00
|
|
|
setTempUrl,
|
|
|
|
removeTempUrl,
|
2011-10-15 20:25:51 +00:00
|
|
|
) where
|
|
|
|
|
2014-12-08 23:14:24 +00:00
|
|
|
import qualified Data.Map as M
|
2013-07-28 19:27:36 +00:00
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2014-12-08 23:14:24 +00:00
|
|
|
import qualified Annex
|
2013-08-29 22:51:22 +00:00
|
|
|
import Logs
|
2011-10-15 20:25:51 +00:00
|
|
|
import Logs.Presence
|
|
|
|
import Logs.Location
|
2013-07-28 19:27:36 +00:00
|
|
|
import qualified Annex.Branch
|
2020-07-14 16:44:35 +00:00
|
|
|
import qualified Git.LsTree
|
|
|
|
import Git.CatFile (catObjectStreamLsTree)
|
|
|
|
import Git.FilePath
|
2014-12-08 17:40:15 +00:00
|
|
|
import Utility.Url
|
2016-01-19 19:55:32 +00:00
|
|
|
import Annex.UUID
|
|
|
|
import qualified Types.Remote as Remote
|
2011-10-15 20:25:51 +00:00
|
|
|
|
2011-10-15 20:36:56 +00:00
|
|
|
{- Gets all urls that a key might be available from. -}
|
2011-10-15 20:25:51 +00:00
|
|
|
getUrls :: Key -> Annex [URLString]
|
2014-12-08 23:14:24 +00:00
|
|
|
getUrls key = do
|
2015-01-28 21:17:26 +00:00
|
|
|
config <- Annex.getGitConfig
|
|
|
|
l <- go $ urlLogFile config key : oldurlLogs config key
|
2014-12-08 23:14:24 +00:00
|
|
|
tmpl <- Annex.getState (maybeToList . M.lookup key . Annex.tempurls)
|
|
|
|
return (tmpl ++ l)
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
go [] = return []
|
|
|
|
go (l:ls) = do
|
2015-04-01 21:53:16 +00:00
|
|
|
us <- currentLogInfo l
|
2012-11-11 04:51:07 +00:00
|
|
|
if null us
|
|
|
|
then go ls
|
2019-01-03 17:21:48 +00:00
|
|
|
else return $ map (decodeBS . fromLogInfo) us
|
2011-10-15 20:25:51 +00:00
|
|
|
|
2014-12-08 17:32:27 +00:00
|
|
|
getUrlsWithPrefix :: Key -> String -> Annex [URLString]
|
2015-03-27 22:49:03 +00:00
|
|
|
getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`)
|
|
|
|
. map (fst . getDownloader)
|
|
|
|
<$> getUrls key
|
2014-12-08 17:32:27 +00:00
|
|
|
|
2018-10-04 21:33:25 +00:00
|
|
|
setUrlPresent :: Key -> URLString -> Annex ()
|
|
|
|
setUrlPresent key url = do
|
2011-10-15 20:25:51 +00:00
|
|
|
us <- getUrls key
|
2015-01-28 21:17:26 +00:00
|
|
|
unless (url `elem` us) $ do
|
|
|
|
config <- Annex.getGitConfig
|
2019-01-03 17:21:48 +00:00
|
|
|
addLog (urlLogFile config key)
|
|
|
|
=<< logNow InfoPresent (LogInfo (encodeBS url))
|
2018-10-04 21:33:25 +00:00
|
|
|
-- If the url does not have an OtherDownloader, it must be present
|
|
|
|
-- in the web.
|
|
|
|
case snd (getDownloader url) of
|
|
|
|
OtherDownloader -> return ()
|
|
|
|
_ -> logChange key webUUID InfoPresent
|
2011-10-15 20:36:56 +00:00
|
|
|
|
2018-10-04 21:33:25 +00:00
|
|
|
setUrlMissing :: Key -> URLString -> Annex ()
|
|
|
|
setUrlMissing key url = do
|
2015-01-28 21:17:26 +00:00
|
|
|
config <- Annex.getGitConfig
|
2019-01-03 17:21:48 +00:00
|
|
|
addLog (urlLogFile config key)
|
|
|
|
=<< logNow InfoMissing (LogInfo (encodeBS url))
|
2018-10-04 21:33:25 +00:00
|
|
|
-- If the url was a web url (not OtherDownloader) and none of
|
|
|
|
-- the remaining urls for the key are web urls, the key must not
|
|
|
|
-- be present in the web.
|
|
|
|
when (isweb url) $
|
|
|
|
whenM (null . filter isweb <$> getUrls key) $
|
|
|
|
logChange key webUUID InfoMissing
|
|
|
|
where
|
|
|
|
isweb u = case snd (getDownloader u) of
|
|
|
|
OtherDownloader -> False
|
|
|
|
_ -> True
|
2013-07-28 19:27:36 +00:00
|
|
|
|
|
|
|
{- Finds all known urls. -}
|
2020-07-14 18:35:26 +00:00
|
|
|
withKnownUrls :: (Annex (Maybe (Key, [URLString])) -> Annex a) -> Annex a
|
|
|
|
withKnownUrls a = do
|
2020-07-14 16:44:35 +00:00
|
|
|
{- Ensure any journalled changes are committed to the git-annex
|
|
|
|
- branch, since we're going to look at its tree. -}
|
2020-04-09 17:54:43 +00:00
|
|
|
_ <- Annex.Branch.update
|
2018-08-02 18:06:06 +00:00
|
|
|
Annex.Branch.commit =<< Annex.Branch.commitMessage
|
2020-07-14 16:44:35 +00:00
|
|
|
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
|
|
|
|
Git.LsTree.LsTreeRecursive
|
|
|
|
Annex.Branch.fullname
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
let want = urlLogFileKey . getTopFilePath . Git.LsTree.file
|
2020-07-14 18:35:26 +00:00
|
|
|
catObjectStreamLsTree l want g (\reader -> a (go reader))
|
2020-07-14 16:44:35 +00:00
|
|
|
`finally` void (liftIO cleanup)
|
2013-07-28 19:27:36 +00:00
|
|
|
where
|
2020-07-14 18:35:26 +00:00
|
|
|
go reader = liftIO reader >>= \case
|
|
|
|
Just (k, Just content) ->
|
|
|
|
case geturls content of
|
|
|
|
[] -> go reader
|
|
|
|
us -> return (Just (k, us))
|
|
|
|
Just (_, Nothing) -> go reader
|
|
|
|
Nothing -> return Nothing
|
2020-07-14 16:44:35 +00:00
|
|
|
|
|
|
|
geturls = map (decodeBS . fromLogInfo) . getLog
|
2013-08-22 22:25:21 +00:00
|
|
|
|
2014-12-08 23:14:24 +00:00
|
|
|
setTempUrl :: Key -> URLString -> Annex ()
|
|
|
|
setTempUrl key url = Annex.changeState $ \s ->
|
|
|
|
s { Annex.tempurls = M.insert key url (Annex.tempurls s) }
|
|
|
|
|
|
|
|
removeTempUrl :: Key -> Annex ()
|
|
|
|
removeTempUrl key = Annex.changeState $ \s ->
|
|
|
|
s { Annex.tempurls = M.delete key (Annex.tempurls s) }
|
|
|
|
|
2017-11-28 21:17:40 +00:00
|
|
|
data Downloader = WebDownloader | YoutubeDownloader | QuviDownloader | OtherDownloader
|
2015-03-05 17:50:15 +00:00
|
|
|
deriving (Eq, Show)
|
2014-12-08 23:14:24 +00:00
|
|
|
|
|
|
|
{- To keep track of how an url is downloaded, it's mangled slightly in
|
2017-11-28 21:17:40 +00:00
|
|
|
- the log, with a prefix indicating when a Downloader is used. -}
|
2014-12-08 23:14:24 +00:00
|
|
|
setDownloader :: URLString -> Downloader -> String
|
|
|
|
setDownloader u WebDownloader = u
|
|
|
|
setDownloader u QuviDownloader = "quvi:" ++ u
|
2017-11-28 21:17:40 +00:00
|
|
|
setDownloader u YoutubeDownloader = "yt:" ++ u
|
2014-12-08 23:14:24 +00:00
|
|
|
setDownloader u OtherDownloader = ":" ++ u
|
2013-08-22 22:25:21 +00:00
|
|
|
|
2016-01-19 19:55:32 +00:00
|
|
|
setDownloader' :: URLString -> Remote -> String
|
|
|
|
setDownloader' u r
|
|
|
|
| Remote.uuid r == webUUID = setDownloader u WebDownloader
|
|
|
|
| otherwise = setDownloader u OtherDownloader
|
|
|
|
|
2013-08-22 22:25:21 +00:00
|
|
|
getDownloader :: URLString -> (URLString, Downloader)
|
|
|
|
getDownloader u = case separate (== ':') u of
|
2017-11-28 21:17:40 +00:00
|
|
|
("yt", u') -> (u', YoutubeDownloader)
|
|
|
|
-- quvi is not used any longer; youtube-dl should be able to handle
|
|
|
|
-- all urls it did.
|
|
|
|
("quvi", u') -> (u', YoutubeDownloader)
|
2014-12-08 23:14:24 +00:00
|
|
|
("", u') -> (u', OtherDownloader)
|
|
|
|
_ -> (u, WebDownloader)
|