diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index 6157efa3f0..385e23a16e 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -1,6 +1,6 @@ {- git-annex file matching - - - Copyright 2012-2024 Joey Hess + - Copyright 2012-2025 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -194,6 +194,7 @@ preferredContentTokens pcd = , ValueToken "approxlackingcopies" (usev $ limitLackingCopies "approxlackingcopies" True) , ValueToken "inbackend" (usev limitInBackend) , ValueToken "metadata" (usev limitMetaData) + , ValueToken "url" (usev limitUrl) , ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd) , ValueToken "onlyingroup" (usev $ limitOnlyInGroup $ getGroupMap pcd) , ValueToken "balanced" (usev $ limitBalanced (repoUUID pcd) (getGroupMap pcd)) diff --git a/CHANGELOG b/CHANGELOG index 7216b21fbb..e0407b3919 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,6 +10,8 @@ git-annex (10.20250631) UNRELEASED; urgency=medium that have experienced the above bug. * Fix symlinks generated to annexed content when in adjusted unlocked branch in a linked worktree on a filesystem not supporting symlinks. + * Add --url option and url= preferred content expression, to match + content that is recorded as present in an url. -- Joey Hess Mon, 07 Jul 2025 15:59:42 -0400 diff --git a/CmdLine/GitAnnex/Options.hs b/CmdLine/GitAnnex/Options.hs index 890f9654de..4b44edda56 100644 --- a/CmdLine/GitAnnex/Options.hs +++ b/CmdLine/GitAnnex/Options.hs @@ -348,6 +348,11 @@ keyMatchingOptions' = <> help "match files with attached metadata" <> hidden ) + , annexOption (setAnnexState . Limit.addUrl) $ strOption + ( long "url" <> metavar paramGlob + <> help "match files by url" + <> hidden + ) , annexFlag (setAnnexState Limit.Wanted.addWantGet) ( long "want-get" <> help "match files the local repository wants to get" diff --git a/Limit.hs b/Limit.hs index d090e09d88..1916a606d5 100644 --- a/Limit.hs +++ b/Limit.hs @@ -31,6 +31,7 @@ import Types.FileMatcher import Types.MetaData import Annex.MetaData import Logs.MetaData +import Logs.Web import Logs.Group import Logs.Unused import Logs.Location @@ -867,6 +868,26 @@ limitMetaData s = case parseMetaDataMatcher s of . S.filter matching . metaDataValues f <$> getCurrentMetaData k +addUrl :: String -> Annex () +addUrl = addLimit . limitUrl + +limitUrl :: MkLimit Annex +limitUrl glob = Right $ MatchFiles + { matchAction = const $ const $ checkKey check + , matchNeedsFileName = False + , matchNeedsFileContent = False + , matchNeedsKey = True + , matchNeedsLocationLog = False + , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False + , matchDesc = "url" =? glob + } + where + check k = any (matchGlob cglob) + . map (fst . getDownloader) + <$> getUrls k + cglob = compileGlob glob CaseSensitive (GlobFilePath False) -- memoized + addAccessedWithin :: Duration -> Annex () addAccessedWithin duration = do now <- liftIO getPOSIXTime diff --git a/doc/git-annex-matching-options.mdwn b/doc/git-annex-matching-options.mdwn index ea29f98848..cf964cc71d 100644 --- a/doc/git-annex-matching-options.mdwn +++ b/doc/git-annex-matching-options.mdwn @@ -178,6 +178,11 @@ in either of two repositories. (Note that you will need to quote the second parameter to avoid the shell doing redirection.) +* `--url=glob` + + Matches when the content is recorded as being present in an url that + matches the glob. + * `--want-get` Matches only when the preferred content settings for the local repository diff --git a/doc/git-annex-preferred-content.mdwn b/doc/git-annex-preferred-content.mdwn index 52c6ff225e..6b9fc521ac 100644 --- a/doc/git-annex-preferred-content.mdwn +++ b/doc/git-annex-preferred-content.mdwn @@ -166,6 +166,11 @@ content not being configured. To match PDFs with between 100 and 200 pages (assuming something has set that metadata), use `metadata=pagecount>=100 and metadata=pagecount<=200` +* `url=glob` + + Matches when the content is recorded as being present in an url that + matches the glob. + * `present` Makes content be wanted if it's present, but not otherwise. diff --git a/doc/todo/match_on_url.mdwn b/doc/todo/match_on_url.mdwn index 6623debbed..5ef885e02d 100644 --- a/doc/todo/match_on_url.mdwn +++ b/doc/todo/match_on_url.mdwn @@ -10,3 +10,5 @@ expression if adding that. An alternative way could be to populate a metadata field with the url, if that were done without increasing the size of the git repository. --[[Joey]] + +> [[done]] --[[Joey]]